By this point in the analysis I have filtered the data using the Seurat Pipeline, and have identified cell types using the CHETAH pipeline.
library( here)
library( Seurat)
library( dplyr)
library( fs)
library(CHETAH)
## Warning: Package 'CHETAH' is deprecated and will be removed from Bioconductor
## version 3.16
library(gplots)
library(reactome.db)
library(ReactomePA)
library(org.Hs.eg.db)
library(dendextend)
library(genefu)
library(limma)
library(edgeR)
library(affy)
library(hgu133plus2.db)
library(WGCNA)
library(goseq)
setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
source('heatmap.3_function.R')
# here::i_am("/brca-scrnaseq")
set_here("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
## File .here already exists in /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq
here::dr_here()
## here() starts at /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq.
## - This directory contains a file ".here"
## - Initial working directory: /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq
## - Current working directory: /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq
here()
## [1] "/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq"
setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
load(file = "Organised to CHETAH.R", verbose = T)
## Loading objects:
## counts1
## biokey1
## brcaDat1
## counts
## metadata
## patients
## cells
## top10
## all.genes
## input
## input_brcaDat1
In the above sections of code I just loaded the libraries necessary for my analysis and loaded in the post-filtering scRNAseq data.
chetah3 = brcaDat1@meta.data[["CHETAH"]]
chetah3[brcaDat1@meta.data[["CHETAH"]] == "Node1"] = brcaDat1@meta.data[["cellType"]][brcaDat1@meta.data[["CHETAH"]]=="Node1"]
table(chetah3, brcaDat1@meta.data[["cellType"]])
##
## chetah3 B_cell Cancer_cell Endothelial_cell Fibroblast Mast_cell
## B_cell 2952 0 0 0 0
## CAF 0 0 0 8780 0
## Cancer_cell 0 14 0 0 0
## CD4 T cell 2 0 0 0 0
## CD8 T cell 2 0 0 0 0
## Dendritic 0 0 0 0 0
## Endothelial 0 0 725 0 0
## Macrophage 0 0 0 0 0
## Mast 0 0 0 0 1
## Mast_cell 0 0 0 0 2
## Myeloid_cell 0 0 0 0 0
## Myofibroblast 0 0 0 174 0
## NK 0 0 0 0 0
## Node10 0 1 0 1072 0
## Node2 57 3 0 0 0
## Node3 13 3 0 0 0
## Node4 19 1 0 0 0
## Node5 7 5 0 0 0
## Node6 2 0 0 0 0
## Node7 0 0 0 0 1
## Node8 0 0 0 0 0
## Node9 0 2 94 1790 0
## pDC 0 0 0 0 0
## Plasma 235 0 0 0 0
## reg. T cell 0 0 0 0 0
## T_cell 0 0 0 0 0
## Unassigned 6759 30043 3477 6155 717
##
## chetah3 Myeloid_cell pDC T_cell
## B_cell 0 0 0
## CAF 0 0 0
## Cancer_cell 0 0 0
## CD4 T cell 0 0 892
## CD8 T cell 0 0 815
## Dendritic 15 0 0
## Endothelial 0 0 0
## Macrophage 113 0 0
## Mast 0 0 0
## Mast_cell 0 0 0
## Myeloid_cell 68 0 0
## Myofibroblast 0 0 0
## NK 0 0 158
## Node10 0 0 0
## Node2 0 0 1775
## Node3 2 0 5507
## Node4 0 0 694
## Node5 0 0 3522
## Node6 0 0 614
## Node7 13 0 0
## Node8 37 0 0
## Node9 0 0 0
## pDC 0 50 0
## Plasma 0 0 0
## reg. T cell 0 0 513
## T_cell 0 0 6440
## Unassigned 10809 655 27193
replace = brcaDat1@meta.data[["CHETAH"]]=="Node1"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node2"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node3"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node4"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node5"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node6"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node8"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node9"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Node10"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
replace = brcaDat1@meta.data[["CHETAH"]]=="Unassigned"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]
table(chetah3, brcaDat1@meta.data[["cellType"]])
##
## chetah3 B_cell Cancer_cell Endothelial_cell Fibroblast Mast_cell
## B_cell 9809 0 0 0 0
## CAF 0 0 0 8780 0
## Cancer_cell 0 30072 0 0 0
## CD4 T cell 2 0 0 0 0
## CD8 T cell 2 0 0 0 0
## Dendritic 0 0 0 0 0
## Endothelial 0 0 725 0 0
## Endothelial_cell 0 0 3571 0 0
## Fibroblast 0 0 0 9017 0
## Macrophage 0 0 0 0 0
## Mast 0 0 0 0 1
## Mast_cell 0 0 0 0 719
## Myeloid_cell 0 0 0 0 0
## Myofibroblast 0 0 0 174 0
## NK 0 0 0 0 0
## Node7 0 0 0 0 1
## pDC 0 0 0 0 0
## Plasma 235 0 0 0 0
## reg. T cell 0 0 0 0 0
## T_cell 0 0 0 0 0
##
## chetah3 Myeloid_cell pDC T_cell
## B_cell 0 0 0
## CAF 0 0 0
## Cancer_cell 0 0 0
## CD4 T cell 0 0 892
## CD8 T cell 0 0 815
## Dendritic 15 0 0
## Endothelial 0 0 0
## Endothelial_cell 0 0 0
## Fibroblast 0 0 0
## Macrophage 113 0 0
## Mast 0 0 0
## Mast_cell 0 0 0
## Myeloid_cell 10916 0 0
## Myofibroblast 0 0 0
## NK 0 0 158
## Node7 13 0 0
## pDC 0 705 0
## Plasma 0 0 0
## reg. T cell 0 0 513
## T_cell 0 0 45745
Here I matched the Bassez et al identified cell-types with the CHETAH-identified cell types. This is because the cell types identified by Bassez et al were not specific,rather in categories, so they do not necessarily align with those of CHETAH identification. However, CHETAH identification could not identify a large proportion of cells, so all of these uncategorised cells can now at least be associated with a category of cell type.
brcaDat1@meta.data$patient_trt = paste0(brcaDat1@meta.data$patient_id, "_", brcaDat1@meta.data$timepoint)
aggrCounts = AggregateExpression(brcaDat1, set = brcaDat1@meta.data$Cell, group.by = "patient_trt")
## The following functions and any applicable methods accept the dots: CreateSeuratObject
counts = aggrCounts[[1]]
dim(counts)
## [1] 25288 62
head(counts)
## BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG 1900.38751 2420.4932 2037.5990 2090.1745 1381.90946
## A1BG-AS1 314.69457 424.6407 470.1492 498.3933 219.37918
## A2M 23268.83314 2686.2540 2666.0859 4816.8543 832.72719
## A2M-AS1 64.37906 200.7731 173.9043 115.5187 48.49831
## A4GALT 1510.14001 200.9704 145.5802 286.4946 34.93897
## AAAS 841.60844 1103.2443 1093.1193 1348.4628 479.36815
## BIOKEY_11_Pre BIOKEY_12_On BIOKEY_12_Pre BIOKEY_13_On BIOKEY_13_Pre
## A1BG 559.64175 1855.8546 3231.5822 326.62392 1715.9383
## A1BG-AS1 46.86798 228.2988 205.0674 30.22590 343.3174
## A2M 390.02989 1945.9009 5295.7009 2985.44388 7077.9538
## A2M-AS1 24.69787 145.4389 121.9377 17.88692 125.7846
## A4GALT 10.93204 161.8869 456.1433 175.20030 630.6713
## AAAS 312.24811 679.2446 874.5552 212.84814 821.0722
## BIOKEY_14_On BIOKEY_14_Pre BIOKEY_15_On BIOKEY_15_Pre BIOKEY_16_On
## A1BG 356.43841 1254.12969 1974.9635 1154.70181 1842.4072
## A1BG-AS1 24.04932 147.91064 337.1768 244.01799 272.2644
## A2M 2924.69116 3529.77582 2488.7363 3454.74353 6626.4567
## A2M-AS1 23.41485 63.36445 115.2525 39.16571 157.6789
## A4GALT 136.38584 362.48009 237.5187 260.08511 460.8066
## AAAS 131.35655 651.31124 865.1258 477.67563 720.9153
## BIOKEY_16_Pre BIOKEY_17_On BIOKEY_17_Pre BIOKEY_18_On BIOKEY_18_Pre
## A1BG 1801.9798 1689.3551 1247.2119 991.87062 460.38283
## A1BG-AS1 351.0143 182.0227 111.8949 146.67247 57.42074
## A2M 3195.9099 3507.5651 1904.2612 6879.32330 5202.23437
## A2M-AS1 157.2182 40.8649 54.8405 29.86681 25.79275
## A4GALT 123.1863 806.6421 251.6255 700.19699 196.93697
## AAAS 676.7908 336.4861 310.9628 605.52253 219.99570
## BIOKEY_19_On BIOKEY_19_Pre BIOKEY_2_On BIOKEY_2_Pre BIOKEY_20_On
## A1BG 863.48156 1542.6933 1976.77315 1558.08227 792.17530
## A1BG-AS1 84.36332 309.2133 162.32359 169.94594 122.26586
## A2M 3607.52909 11753.8181 7373.98091 7569.68694 6064.83994
## A2M-AS1 43.26117 131.6930 43.49986 29.03899 15.74553
## A4GALT 212.73272 441.0435 269.62462 105.57977 334.47803
## AAAS 312.03455 723.3303 534.54011 413.87989 494.24070
## BIOKEY_20_Pre BIOKEY_21_On BIOKEY_21_Pre BIOKEY_22_On BIOKEY_22_Pre
## A1BG 64.378561 1489.29558 878.24589 1398.0354 191.79383
## A1BG-AS1 12.292394 88.67642 98.07367 147.4568 17.03978
## A2M 1491.594254 2693.08371 2556.03826 2675.3610 1233.02681
## A2M-AS1 3.000855 18.21744 18.44090 53.9638 20.77072
## A4GALT 110.549904 1475.06842 332.45256 982.2869 111.21624
## AAAS 51.997246 643.38051 270.48765 676.9780 163.44320
## BIOKEY_23_On BIOKEY_23_Pre BIOKEY_24_On BIOKEY_24_Pre BIOKEY_25_On
## A1BG 2208.5338 76.48401 712.50143 653.02749 213.27953
## A1BG-AS1 292.7142 31.29229 73.92808 45.52037 33.37356
## A2M 6464.4014 533.28570 3933.47287 2117.44697 352.39940
## A2M-AS1 118.7594 10.26405 67.45770 18.72431 0.00000
## A4GALT 497.2741 35.67510 461.98096 169.50604 44.75861
## AAAS 546.7235 20.59312 269.69741 213.80754 82.39351
## BIOKEY_25_Pre BIOKEY_26_On BIOKEY_26_Pre BIOKEY_27_On BIOKEY_27_Pre
## A1BG 101.638815 5229.66247 857.58915 751.93290 1546.5327
## A1BG-AS1 6.409162 211.84285 52.28328 110.01495 226.2388
## A2M 648.846994 18311.18892 4039.69997 3863.13499 7730.4527
## A2M-AS1 0.000000 44.73858 33.09237 60.34836 125.1434
## A4GALT 48.911380 738.38370 87.17145 454.23169 638.9537
## AAAS 58.852298 505.28363 147.76195 258.14705 636.9252
## BIOKEY_28_On BIOKEY_28_Pre BIOKEY_29_On BIOKEY_29_Pre BIOKEY_3_On
## A1BG 1085.89258 372.01264 60.421630 652.40703 1702.58326
## A1BG-AS1 77.89455 92.53131 11.663869 44.64777 139.84319
## A2M 2271.96717 3227.92061 522.678685 1831.08341 2802.43695
## A2M-AS1 21.21864 16.34732 5.359943 14.15381 45.69168
## A4GALT 13.69600 153.57510 48.620618 115.80593 401.35488
## AAAS 232.72662 189.72427 35.980999 176.29975 358.86291
## BIOKEY_3_Pre BIOKEY_30_On BIOKEY_30_Pre BIOKEY_31_On BIOKEY_31_Pre
## A1BG 1870.5906 1167.76271 2039.68894 1983.15751 1186.54770
## A1BG-AS1 225.2130 105.42174 170.07499 199.62419 68.97513
## A2M 3040.0649 2919.04175 3159.96041 4374.99996 1165.70749
## A2M-AS1 37.2509 27.48489 22.51573 67.24141 12.15292
## A4GALT 290.6268 102.85704 96.79814 464.53299 426.35554
## AAAS 441.3157 406.16096 504.91820 485.53125 202.89897
## BIOKEY_4_On BIOKEY_4_Pre BIOKEY_5_On BIOKEY_5_Pre BIOKEY_6_On
## A1BG 994.603168 2179.06098 137.75410 1031.56107 839.92827
## A1BG-AS1 102.681087 293.93446 18.66668 164.84794 105.37274
## A2M 566.885365 1131.71116 343.50344 563.94215 5049.67645
## A2M-AS1 52.216702 72.76798 14.36486 135.32170 28.95516
## A4GALT 7.668021 87.94784 33.58809 34.20967 510.43514
## AAAS 504.467631 884.23504 29.61075 374.69285 482.93330
## BIOKEY_6_Pre BIOKEY_7_On BIOKEY_7_Pre BIOKEY_8_On BIOKEY_8_Pre
## A1BG 942.92485 1477.88713 209.93130 559.91279 138.94097
## A1BG-AS1 108.88995 113.41705 17.15663 74.87933 33.23953
## A2M 3731.40542 1403.38859 179.28400 229.90130 287.42814
## A2M-AS1 23.07205 44.48068 0.00000 45.09714 0.00000
## A4GALT 267.64798 388.33220 29.14525 11.44350 16.51448
## AAAS 553.78968 288.95222 55.61864 208.89668 63.90025
## BIOKEY_9_On BIOKEY_9_Pre
## A1BG 360.206896 968.90932
## A1BG-AS1 36.730616 100.41154
## A2M 1436.647896 7330.89666
## A2M-AS1 7.297468 29.82598
## A4GALT 78.491086 456.12146
## AAAS 106.699061 412.62730
dim(aggrCounts[[1]])
## [1] 25288 62
par(mar=c(11,5,2,0))
colSums(aggrCounts[[1]]) %>% sort() %>% barplot(., las=3, ylab = "Number of reads", cex.lab=1.2, cex.axis=1.2, cex=1)
mtext('Tumour sample', 1, 9, cex = 1.2)
# abline(h=3e6, col='blue', lty=3)
abline(h=3e6, col='blue', lty=3, lwd=1.3)
abline(h=5e6, col='blue', lty=3, lwd=1.3)
# Here I aggregated the count data across all cells for each sample, so that every gene has one representative expression level in each tumour. This makes the data mimic bulk RNA sequencing moreso than scRNAseq.
par(mar=c(11,5,2,0))
colSums(counts) %>% sort() %>% barplot(., las=3, ylab = "Number of reads", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext('Tumour sample', 1, 9, cex = 1.2)
# abline(h=3e6, col='blue', lty=3)
abline(h=2e6, col='blue', lty=3, lwd=1.3)
# In the grph above, I plotted the total number of counts for each sample. The two horizontal lines represent cut-off thresholds of 3 million counts and 1 milion counts. Samples with low total counts may not be accurate and so must be disregarded. Howeve, disregarding all tumours with a total count of under 3 million removes almost a third of the data (19 samples as shown below), so instead I will remove any samples with less than 2 million counts.
(colSums(counts) < 3000000) %>% sum()
## [1] 5
colnames(counts)[colSums(counts) < 3000000] %>% t() %>% t()
## [,1]
## data[, 1]BIOKEY_20_Pre "BIOKEY_20_Pre"
## data[, 1]BIOKEY_23_Pre "BIOKEY_23_Pre"
## data[, 1]BIOKEY_25_Pre "BIOKEY_25_Pre"
## data[, 1]BIOKEY_29_On "BIOKEY_29_On"
## data[, 1]BIOKEY_7_Pre "BIOKEY_7_Pre"
head(counts)
## BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG 1900.38751 2420.4932 2037.5990 2090.1745 1381.90946
## A1BG-AS1 314.69457 424.6407 470.1492 498.3933 219.37918
## A2M 23268.83314 2686.2540 2666.0859 4816.8543 832.72719
## A2M-AS1 64.37906 200.7731 173.9043 115.5187 48.49831
## A4GALT 1510.14001 200.9704 145.5802 286.4946 34.93897
## AAAS 841.60844 1103.2443 1093.1193 1348.4628 479.36815
## BIOKEY_11_Pre BIOKEY_12_On BIOKEY_12_Pre BIOKEY_13_On BIOKEY_13_Pre
## A1BG 559.64175 1855.8546 3231.5822 326.62392 1715.9383
## A1BG-AS1 46.86798 228.2988 205.0674 30.22590 343.3174
## A2M 390.02989 1945.9009 5295.7009 2985.44388 7077.9538
## A2M-AS1 24.69787 145.4389 121.9377 17.88692 125.7846
## A4GALT 10.93204 161.8869 456.1433 175.20030 630.6713
## AAAS 312.24811 679.2446 874.5552 212.84814 821.0722
## BIOKEY_14_On BIOKEY_14_Pre BIOKEY_15_On BIOKEY_15_Pre BIOKEY_16_On
## A1BG 356.43841 1254.12969 1974.9635 1154.70181 1842.4072
## A1BG-AS1 24.04932 147.91064 337.1768 244.01799 272.2644
## A2M 2924.69116 3529.77582 2488.7363 3454.74353 6626.4567
## A2M-AS1 23.41485 63.36445 115.2525 39.16571 157.6789
## A4GALT 136.38584 362.48009 237.5187 260.08511 460.8066
## AAAS 131.35655 651.31124 865.1258 477.67563 720.9153
## BIOKEY_16_Pre BIOKEY_17_On BIOKEY_17_Pre BIOKEY_18_On BIOKEY_18_Pre
## A1BG 1801.9798 1689.3551 1247.2119 991.87062 460.38283
## A1BG-AS1 351.0143 182.0227 111.8949 146.67247 57.42074
## A2M 3195.9099 3507.5651 1904.2612 6879.32330 5202.23437
## A2M-AS1 157.2182 40.8649 54.8405 29.86681 25.79275
## A4GALT 123.1863 806.6421 251.6255 700.19699 196.93697
## AAAS 676.7908 336.4861 310.9628 605.52253 219.99570
## BIOKEY_19_On BIOKEY_19_Pre BIOKEY_2_On BIOKEY_2_Pre BIOKEY_20_On
## A1BG 863.48156 1542.6933 1976.77315 1558.08227 792.17530
## A1BG-AS1 84.36332 309.2133 162.32359 169.94594 122.26586
## A2M 3607.52909 11753.8181 7373.98091 7569.68694 6064.83994
## A2M-AS1 43.26117 131.6930 43.49986 29.03899 15.74553
## A4GALT 212.73272 441.0435 269.62462 105.57977 334.47803
## AAAS 312.03455 723.3303 534.54011 413.87989 494.24070
## BIOKEY_20_Pre BIOKEY_21_On BIOKEY_21_Pre BIOKEY_22_On BIOKEY_22_Pre
## A1BG 64.378561 1489.29558 878.24589 1398.0354 191.79383
## A1BG-AS1 12.292394 88.67642 98.07367 147.4568 17.03978
## A2M 1491.594254 2693.08371 2556.03826 2675.3610 1233.02681
## A2M-AS1 3.000855 18.21744 18.44090 53.9638 20.77072
## A4GALT 110.549904 1475.06842 332.45256 982.2869 111.21624
## AAAS 51.997246 643.38051 270.48765 676.9780 163.44320
## BIOKEY_23_On BIOKEY_23_Pre BIOKEY_24_On BIOKEY_24_Pre BIOKEY_25_On
## A1BG 2208.5338 76.48401 712.50143 653.02749 213.27953
## A1BG-AS1 292.7142 31.29229 73.92808 45.52037 33.37356
## A2M 6464.4014 533.28570 3933.47287 2117.44697 352.39940
## A2M-AS1 118.7594 10.26405 67.45770 18.72431 0.00000
## A4GALT 497.2741 35.67510 461.98096 169.50604 44.75861
## AAAS 546.7235 20.59312 269.69741 213.80754 82.39351
## BIOKEY_25_Pre BIOKEY_26_On BIOKEY_26_Pre BIOKEY_27_On BIOKEY_27_Pre
## A1BG 101.638815 5229.66247 857.58915 751.93290 1546.5327
## A1BG-AS1 6.409162 211.84285 52.28328 110.01495 226.2388
## A2M 648.846994 18311.18892 4039.69997 3863.13499 7730.4527
## A2M-AS1 0.000000 44.73858 33.09237 60.34836 125.1434
## A4GALT 48.911380 738.38370 87.17145 454.23169 638.9537
## AAAS 58.852298 505.28363 147.76195 258.14705 636.9252
## BIOKEY_28_On BIOKEY_28_Pre BIOKEY_29_On BIOKEY_29_Pre BIOKEY_3_On
## A1BG 1085.89258 372.01264 60.421630 652.40703 1702.58326
## A1BG-AS1 77.89455 92.53131 11.663869 44.64777 139.84319
## A2M 2271.96717 3227.92061 522.678685 1831.08341 2802.43695
## A2M-AS1 21.21864 16.34732 5.359943 14.15381 45.69168
## A4GALT 13.69600 153.57510 48.620618 115.80593 401.35488
## AAAS 232.72662 189.72427 35.980999 176.29975 358.86291
## BIOKEY_3_Pre BIOKEY_30_On BIOKEY_30_Pre BIOKEY_31_On BIOKEY_31_Pre
## A1BG 1870.5906 1167.76271 2039.68894 1983.15751 1186.54770
## A1BG-AS1 225.2130 105.42174 170.07499 199.62419 68.97513
## A2M 3040.0649 2919.04175 3159.96041 4374.99996 1165.70749
## A2M-AS1 37.2509 27.48489 22.51573 67.24141 12.15292
## A4GALT 290.6268 102.85704 96.79814 464.53299 426.35554
## AAAS 441.3157 406.16096 504.91820 485.53125 202.89897
## BIOKEY_4_On BIOKEY_4_Pre BIOKEY_5_On BIOKEY_5_Pre BIOKEY_6_On
## A1BG 994.603168 2179.06098 137.75410 1031.56107 839.92827
## A1BG-AS1 102.681087 293.93446 18.66668 164.84794 105.37274
## A2M 566.885365 1131.71116 343.50344 563.94215 5049.67645
## A2M-AS1 52.216702 72.76798 14.36486 135.32170 28.95516
## A4GALT 7.668021 87.94784 33.58809 34.20967 510.43514
## AAAS 504.467631 884.23504 29.61075 374.69285 482.93330
## BIOKEY_6_Pre BIOKEY_7_On BIOKEY_7_Pre BIOKEY_8_On BIOKEY_8_Pre
## A1BG 942.92485 1477.88713 209.93130 559.91279 138.94097
## A1BG-AS1 108.88995 113.41705 17.15663 74.87933 33.23953
## A2M 3731.40542 1403.38859 179.28400 229.90130 287.42814
## A2M-AS1 23.07205 44.48068 0.00000 45.09714 0.00000
## A4GALT 267.64798 388.33220 29.14525 11.44350 16.51448
## AAAS 553.78968 288.95222 55.61864 208.89668 63.90025
## BIOKEY_9_On BIOKEY_9_Pre
## A1BG 360.206896 968.90932
## A1BG-AS1 36.730616 100.41154
## A2M 1436.647896 7330.89666
## A2M-AS1 7.297468 29.82598
## A4GALT 78.491086 456.12146
## AAAS 106.699061 412.62730
counts = counts[,-c(25:26,31:32,35:36,43:44,57:58)]
dim(counts)
## [1] 25288 52
# I thus removed these 2 samples and their pre/on treatment counterparts.
#table(brcaDat1@meta.data$cellType)
#table(brcaDat1@meta.data$patient_id, brcaDat1@meta.data$cellType)
#prop.table(table(brcaDat1@meta.data$patient_id, brcaDat1@meta.data$cellType),1) %>% round(.,3)
#boxplot(prop.table(table(brcaDat1@meta.data$CHETAH, brcaDat1@meta.data$patient_id),1) %>% round(.,3))
# boxplot (brcaDat1@meta.data$CHETAH, brcaDat1@meta.data$patient_id)
# hist(log10(brcaDat1@meta.data$nCount_RNA), 100)
dim(brcaDat1)
## [1] 25288 122993
head(brcaDat1)
## orig.ident nCount_RNA nFeature_RNA
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 brca 684 430
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 brca 1252 700
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 brca 522 330
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 brca 8454 2637
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 brca 1908 916
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 brca 5652 2290
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 brca 1779 860
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 brca 626 211
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 brca 5873 2248
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 brca 742 424
## Cell patient_id
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 BIOKEY_13_Pre_AAACCTGCAACAACCT-1 BIOKEY_13
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 BIOKEY_13
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 BIOKEY_13
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 BIOKEY_13
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 BIOKEY_13
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 BIOKEY_13_Pre_AAACGGGCATGGGACA-1 BIOKEY_13
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 BIOKEY_13
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 BIOKEY_13
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 BIOKEY_13
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 BIOKEY_13
## timepoint expansion BC_type cellType
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 Pre n/a HER2+ Myeloid_cell
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 Pre n/a HER2+ T_cell
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 Pre n/a HER2+ pDC
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 Pre n/a HER2+ Myeloid_cell
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 Pre n/a HER2+ Myeloid_cell
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 Pre n/a HER2+ Fibroblast
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 Pre n/a HER2+ Endothelial_cell
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 Pre n/a HER2+ B_cell
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 Pre n/a HER2+ Fibroblast
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 Pre n/a HER2+ Cancer_cell
## cohort percent.mt RNA_snn_res.0.5
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 treatment_naive 2.4853801 9
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 treatment_naive 4.0734824 0
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 treatment_naive 0.7662835 7
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 treatment_naive 4.2228531 9
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 treatment_naive 5.9224319 4
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 treatment_naive 3.8747346 3
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 treatment_naive 7.0264193 18
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 treatment_naive 0.1597444 12
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 treatment_naive 5.3635280 3
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 treatment_naive 2.4258760 0
## seurat_clusters CHETAH patient_trt
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 9 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 0 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 7 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 9 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 4 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 3 CAF BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 18 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 12 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 3 CAF BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 0 Unassigned BIOKEY_13_Pre
# table(brcaDat1@meta.data$CHETAH, brcaDat1@meta.data$cellType)
# brcaDat1 = brcaDat1[-c(brcaDat1$patient_id=="BIOKEY_23_PRE", brcaDat1$patient_id=="BIOKEY_23_ON", brcaDat1$patient_id=="BIOKEY_29_PRE", brcaDat1$patient_id=="BIOKEY_29_ON"),]
# brcaDat1 = brcaDat1[,-c(brcaDat1$patient_id=="BIOKEY_23", brcaDat1$patient_id=="BIOKEY_29")]
# dim(brcaDat1)
# table(brcaDat1$patient_id)
# nCount_RNA = total number of molecules detected within a cell. This data shows a two-peak distribution of data.
# hist(brcaDat1@meta.data$nCount_RNA/brcaDat1@meta.data$nFeature_RNA, 100)
# boxplot((brcaDat1@meta.data$nCount_RNA/brcaDat1@meta.data$nFeature_RNA)~brcaDat1@meta.data$patient_id)
I cut off the bottom 6 tumours, because they have a low read information. These tumours are: BIOKEY_20_Pre, BIOKEY_23_Pre, BIOKEY_25_Pre, BIOKEY_29_On, BIOKEY_5_On, and BIOKEY_7_Pre. I assume that I should also remove their respective counterparts (pre- and on- data relating to that same tumour).
dge = DGEList( counts = counts)
dge = calcNormFactors(dge)
logcpm = edgeR::cpm(dge, log = TRUE, prior.counts = 3)
design = model.matrix(~rep(1, ncol(counts)) - 1)
v = voom(dge, design = design, plot = TRUE)
expDat = v$E
expDat[1:5,1:5]
## BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG 5.6562549 5.572602 5.389627 5.386081 5.9465599
## A1BG-AS1 3.0638964 3.063015 3.275127 3.318916 3.2941579
## A2M 9.2699397 5.722867 5.777397 6.590354 5.2161568
## A2M-AS1 0.7834785 1.984229 1.842910 1.214542 1.1282508
## A4GALT 5.3247417 1.985643 1.587235 2.521208 0.6608555
head(expDat)
## BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG 5.6562549 5.572602 5.389627 5.386081 5.9465599
## A1BG-AS1 3.0638964 3.063015 3.275127 3.318916 3.2941579
## A2M 9.2699397 5.722867 5.777397 6.590354 5.2161568
## A2M-AS1 0.7834785 1.984229 1.842910 1.214542 1.1282508
## A4GALT 5.3247417 1.985643 1.587235 2.521208 0.6608555
## AAAS 4.4816597 4.439409 4.491514 4.753963 4.4200849
## BIOKEY_11_Pre BIOKEY_12_On BIOKEY_12_Pre BIOKEY_13_On BIOKEY_13_Pre
## A1BG 5.4943341 5.876692 6.558254 5.372207 5.767321
## A1BG-AS1 1.9305263 2.856371 2.583474 1.959897 3.447617
## A2M 4.9739751 5.945029 7.270748 8.562485 7.811337
## A2M-AS1 1.0199158 2.207656 1.835910 1.219121 2.002654
## A4GALT -0.1203027 2.361727 3.734930 4.475486 4.324006
## AAAS 4.6535433 4.427284 4.673236 4.755579 4.704362
## BIOKEY_14_On BIOKEY_14_Pre BIOKEY_15_On BIOKEY_15_Pre BIOKEY_16_On
## A1BG 5.491218 5.644349 5.659285 5.3438729 5.169531
## A1BG-AS1 1.629298 2.564754 3.110809 3.1037405 2.413275
## A2M 8.526002 7.136868 5.992797 6.9245154 7.015895
## A2M-AS1 1.591521 1.348244 1.566209 0.4797642 1.627177
## A4GALT 4.108516 3.855050 2.606241 3.1955546 3.171346
## AAAS 4.054512 4.699614 4.468909 4.0713405 3.816450
## BIOKEY_16_Pre BIOKEY_17_On BIOKEY_17_Pre BIOKEY_18_On BIOKEY_18_Pre
## A1BG 5.429984 6.398167 6.540137 5.2725695 5.588972
## A1BG-AS1 3.071657 3.187415 3.067500 2.5191983 2.596724
## A2M 6.256454 7.451943 7.150463 8.0659901 9.085770
## A2M-AS1 1.915423 1.045818 2.045332 0.2422577 1.457309
## A4GALT 1.564759 5.332162 4.233066 4.7704812 4.365964
## AAAS 4.017848 4.072028 4.537984 4.5610619 4.525323
## BIOKEY_19_On BIOKEY_19_Pre BIOKEY_2_On BIOKEY_2_Pre BIOKEY_21_On
## A1BG 5.872312 5.373995 6.3601808 6.3617116 6.1896793
## A1BG-AS1 2.524524 3.057081 2.7580502 3.1688635 2.1273710
## A2M 7.934450 8.303195 8.2592101 8.6418063 7.0440918
## A2M-AS1 1.569034 1.828793 0.8703114 0.6402419 -0.1249084
## A4GALT 3.853740 3.568703 3.4883656 2.4846987 6.1758357
## AAAS 4.405327 4.281799 4.4743875 4.4505032 4.9794298
## BIOKEY_21_Pre BIOKEY_22_On BIOKEY_22_Pre BIOKEY_24_On BIOKEY_24_Pre
## A1BG 6.2284215 6.119874 5.122621 5.749911 5.9290209
## A1BG-AS1 3.0722496 2.879205 1.668012 2.489925 2.1011174
## A2M 7.7690951 7.055960 7.804026 8.213921 7.6253675
## A2M-AS1 0.6925521 1.437398 1.946249 2.358715 0.8417764
## A4GALT 4.8282920 5.610908 4.339148 5.125400 3.9863590
## AAAS 4.5312025 5.074203 4.892505 4.350020 4.3204556
## BIOKEY_26_On BIOKEY_26_Pre BIOKEY_27_On BIOKEY_27_Pre BIOKEY_28_On
## A1BG 7.384139 6.281395 5.988802 6.008534 6.4364013
## A1BG-AS1 2.761751 2.258421 3.221481 3.238134 2.6437530
## A2M 9.191977 8.516622 8.349126 8.329677 7.5011152
## A2M-AS1 0.530981 1.606473 2.360529 2.386437 0.7919333
## A4GALT 4.560703 2.990447 5.262255 4.733943 0.1784844
## AAAS 4.013876 3.748416 4.448224 4.729359 4.2166601
## BIOKEY_28_Pre BIOKEY_3_On BIOKEY_3_Pre BIOKEY_30_On BIOKEY_30_Pre
## A1BG 5.1438604 6.527264 6.3610656 6.2089421 6.6789158
## A1BG-AS1 3.1423593 2.926146 3.3097476 2.7456480 3.0986911
## A2M 8.2593277 7.246054 7.0615256 7.5303193 7.3103478
## A2M-AS1 0.6771622 1.322892 0.7298409 0.8253694 0.2089768
## A4GALT 3.8702047 4.443861 3.6769058 2.7102859 2.2887690
## AAAS 4.1742724 4.282628 4.2787026 4.6864758 4.6657626
## BIOKEY_31_On BIOKEY_31_Pre BIOKEY_4_On BIOKEY_4_Pre BIOKEY_5_On
## A1BG 6.018702 6.2703980 5.564463 5.927756 5.719734
## A1BG-AS1 2.709506 2.1756605 2.294796 3.039737 2.869084
## A2M 7.159987 6.2448446 4.753946 4.982861 7.034835
## A2M-AS1 1.146721 -0.2813656 1.325949 1.033045 2.502390
## A4GALT 3.925944 4.7948399 -1.364252 1.304689 3.699751
## AAAS 3.989660 3.7254043 4.585808 4.627035 3.520763
## BIOKEY_5_Pre BIOKEY_6_On BIOKEY_6_Pre BIOKEY_8_On BIOKEY_8_Pre
## A1BG 5.6010827 5.8228518 5.35769515 5.7145084 5.171955
## A1BG-AS1 2.9591313 2.8340583 2.24926771 2.8202593 3.124813
## A2M 4.7304519 8.4099890 7.34162807 4.4321673 6.218010
## A2M-AS1 2.6753402 0.9883196 0.03493685 2.0950336 -2.951555
## A4GALT 0.7070359 5.1048672 3.54281694 0.1623155 2.137136
## AAAS 4.1412585 5.0250439 4.59042770 4.2942570 4.057439
## BIOKEY_9_On BIOKEY_9_Pre
## A1BG 6.075657 5.3941316
## A1BG-AS1 2.799392 2.1301168
## A2M 8.069967 8.3130418
## A2M-AS1 0.543980 0.3956518
## A4GALT 3.884592 4.3080243
## AAAS 4.325123 4.1636121
Here I put the scRNAseq data through voom normalisation. The “voom”
function estimates the relationship between the mean and the variance of
the logCPM data, normalises the data, and creates “precision weights”
for each observation that are incorporated into the limma analysis. From
this voom normalisation, I created an expDat expression
object.
setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
load("expdatpreandon.RData", verbose = T)
## Loading objects:
## expDaton
## expDatpre
head(expDatpre)
## T_BIOKEY_1_Pre T_BIOKEY_10_Pre T_BIOKEY_11_Pre E_BIOKEY_12_Pre
## A1BG 5.516145 5.431743 5.413533 6.9626246
## A1BG-AS1 2.997086 3.239654 1.691761 1.3996873
## A2M 4.539965 6.134018 4.152077 6.5496177
## A2M-AS1 1.479498 1.143693 1.200933 0.7116221
## A4GALT 1.460347 2.455938 -3.898274 3.1029785
## AAAS 4.371725 4.656356 4.620697 4.8215507
## H_BIOKEY_13_Pre T_BIOKEY_14_Pre T_BIOKEY_15_Pre T_BIOKEY_16_Pre
## A1BG 5.694850 5.312122 5.1150936 5.472128
## A1BG-AS1 3.399908 1.696770 3.0854935 3.065144
## A2M 7.333523 6.722100 6.3072421 4.678757
## A2M-AS1 1.586530 1.469455 -0.8459435 2.058809
## A4GALT 3.863119 3.870668 2.6443854 1.143593
## AAAS 4.548070 4.634678 4.1542320 4.100351
## E_BIOKEY_17_Pre E_BIOKEY_18_Pre T_BIOKEY_19_Pre T_BIOKEY_2_Pre
## A1BG 6.9725245 5.8004299 5.199503 6.6866931
## A1BG-AS1 1.7868431 2.6129625 3.130837 3.4713497
## A2M 6.4869919 8.2738843 7.953555 8.8882832
## A2M-AS1 0.8626157 0.4420466 1.472353 -0.1811935
## A4GALT 3.7296845 4.2644559 3.166750 1.6956410
## AAAS 4.3232065 5.3965006 4.022851 4.0371876
## E_BIOKEY_20_Pre E_BIOKEY_21_Pre E_BIOKEY_22_Pre H_BIOKEY_23_Pre
## A1BG 1.176529 6.5114793 5.555067 2.8795228
## A1BG-AS1 1.176529 3.3055763 2.045092 5.6808534
## A2M 8.690956 6.8709672 7.109675 7.3724163
## A2M-AS1 1.176529 -0.3552621 1.918203 -0.1942732
## A4GALT 1.176529 5.0117301 3.622746 4.2105114
## AAAS 3.730991 4.0803646 4.632077 3.2842698
## E_BIOKEY_24_Pre T_BIOKEY_25_Pre T_BIOKEY_26_Pre E_BIOKEY_27_Pre
## A1BG 6.3196452 5.3433102 6.243258 5.9996325
## A1BG-AS1 0.7626526 -0.2560209 2.482995 3.5492414
## A2M 6.7106390 5.2666285 8.619317 7.8014330
## A2M-AS1 -2.0029806 -0.2560209 1.182056 0.9737809
## A4GALT 3.4753680 -0.2560209 2.969126 4.4407770
## AAAS 4.2134547 -0.2560209 3.654162 4.7984305
## H_BIOKEY_28_Pre E_BIOKEY_29_Pre E_BIOKEY_3_Pre E_BIOKEY_30_Pre
## A1BG 5.168043 6.787897 6.2232016 6.122288
## A1BG-AS1 3.374182 2.672277 3.8918174 2.807408
## A2M 7.392796 6.954365 6.3955437 7.712788
## A2M-AS1 -2.270948 1.015733 0.7360115 2.509067
## A4GALT 2.815316 3.937832 3.5063148 0.719702
## AAAS 4.445411 5.104811 3.8882224 5.323493
## T_BIOKEY_31_Pre E_BIOKEY_4_Pre E_BIOKEY_5_Pre E_BIOKEY_6_Pre
## A1BG 6.657552 5.7312825 5.7578539 5.6438626
## A1BG-AS1 2.181624 2.8505131 2.7643786 -2.7281067
## A2M 6.274596 3.5410057 0.9604585 6.5644885
## A2M-AS1 -1.681436 -0.4571456 1.4749382 -0.5827321
## A4GALT 5.316876 0.8842042 -1.6077021 3.1398225
## AAAS 3.806920 4.4301778 3.8430954 4.5231817
## E_BIOKEY_7_Pre T_BIOKEY_8_Pre T_BIOKEY_9_Pre
## A1BG 6.2737109 5.1780177 5.184807
## A1BG-AS1 3.4544844 2.9421822 2.317415
## A2M 5.0657361 3.3681097 8.081296
## A2M-AS1 -0.1963423 -2.2277830 -0.209915
## A4GALT 2.7600000 -0.5087031 4.325937
## AAAS 4.3550455 3.8162667 4.127680
head(expDaton)
## T_BIOKEY_1_On T_BIOKEY_10_On T_BIOKEY_11_On E_BIOKEY_12_On
## A1BG 5.593694 5.480948 5.8927430 5.967722
## A1BG-AS1 2.519558 2.917420 3.5706021 2.373584
## A2M 8.371064 5.447220 3.8187982 4.274771
## A2M-AS1 1.650927 1.130577 0.2677720 1.728762
## A4GALT 4.643174 1.566061 -0.4592697 1.957988
## AAAS 4.333861 4.284210 4.2789935 4.397291
## H_BIOKEY_13_On T_BIOKEY_14_On T_BIOKEY_15_On T_BIOKEY_16_On
## A1BG 5.6831548 5.661959 5.682286 5.184911
## A1BG-AS1 1.5285815 1.118120 3.205944 2.382728
## A2M 7.4582249 8.077234 5.691263 6.739329
## A2M-AS1 0.5827826 -2.846790 1.558496 1.426495
## A4GALT 3.8765917 4.213639 2.203444 3.246004
## AAAS 4.7713310 4.329493 4.509878 3.815115
## E_BIOKEY_17_On E_BIOKEY_18_On T_BIOKEY_19_On T_BIOKEY_2_On
## A1BG 6.300132 5.043385 5.602146 6.4209985
## A1BG-AS1 3.645268 1.661507 3.274227 2.7406479
## A2M 6.450069 7.836474 6.361894 8.7286273
## A2M-AS1 2.261176 0.367614 -1.371798 -0.8633119
## A4GALT 3.680405 4.865045 3.824435 2.8001224
## AAAS 2.897744 4.736787 1.971860 4.4909923
## E_BIOKEY_20_On E_BIOKEY_21_On E_BIOKEY_22_On H_BIOKEY_23_On
## A1BG 5.2249578 6.3920409 6.095602 6.7085634
## A1BG-AS1 2.6145721 1.8623147 3.344646 4.1013221
## A2M 7.6013998 6.0589374 6.107070 7.6700494
## A2M-AS1 -0.1537773 0.4856339 1.648652 0.7017416
## A4GALT 3.4599911 6.1876960 5.677838 4.0548416
## AAAS 4.4096361 4.7629929 5.098202 4.4447414
## E_BIOKEY_24_On T_BIOKEY_25_On T_BIOKEY_26_On E_BIOKEY_27_On
## A1BG 5.845190 5.859633 7.4124501 6.431447
## A1BG-AS1 3.426896 4.390649 2.8426537 3.598911
## A2M 8.035704 6.192754 9.1257901 7.543428
## A2M-AS1 2.558571 -1.156028 0.5764085 -1.482937
## A4GALT 4.258148 4.277467 4.6071349 5.377628
## AAAS 5.315897 4.167467 3.9125363 4.866365
## H_BIOKEY_28_On E_BIOKEY_29_On E_BIOKEY_3_On E_BIOKEY_30_On
## A1BG 6.7142363 6.347347 6.217134 6.5135803
## A1BG-AS1 1.6902329 3.007808 2.647809 3.0530818
## A2M 8.2976905 5.230938 7.024702 7.3640544
## A2M-AS1 0.3200853 3.007808 1.031557 0.5543158
## A4GALT 0.1924999 6.067528 4.242843 1.9288582
## AAAS 3.7705896 3.007808 3.753301 4.6202219
## T_BIOKEY_31_On E_BIOKEY_4_On E_BIOKEY_5_On E_BIOKEY_6_On E_BIOKEY_7_On
## A1BG 6.4391380 5.317964 6.144400 5.6636118 7.080815
## A1BG-AS1 2.4980394 2.751499 1.069224 3.1347585 2.931139
## A2M 7.3432528 4.655921 6.077872 7.9495697 6.311168
## A2M-AS1 0.3779867 1.631032 1.069224 -0.5050988 2.213239
## A4GALT 4.5948779 -3.979205 1.069224 4.4577645 4.797048
## AAAS 3.9582696 4.231757 5.273033 5.7455365 4.564079
## T_BIOKEY_8_On T_BIOKEY_9_On
## A1BG 5.6358750 6.613882
## A1BG-AS1 2.7856012 3.072074
## A2M 3.8636246 7.829947
## A2M-AS1 2.4442290 -1.842228
## A4GALT 0.3442736 1.727214
## AAAS 4.3373941 4.530149
expDatpre = expDatpre[,-c(13,16,18,22,29)]
expDaton = expDaton[,-c(13,16,18,22,29)]
# From the expDat object I created expDatpre and expDaton objects, which just contain expression data for the samples' pre-treatment or on-treatment tumours individually.
data(pam50.robust)
expAnnot = matrix(rownames(expDat), ncol=1)
colnames(expAnnot) = "Gene.Symbol"
expAnn = AnnotationDbi::select(org.Hs.eg.db, keys = expAnnot, keytype = "SYMBOL", columns = "ENTREZID")
mt = match(rownames(expDat), expAnn$SYMBOL)
expAnnx = expAnn[mt,]
colnames(expAnnx) = c("Gene.Symbol", "EntrezGene.ID")
expDatpreScale = t(scale(t(expDatpre)))
expDatpreScale[expDatpreScale < -3] = -3
expDatpreScale[expDatpreScale > 3] = 3
PAM50pre = molecular.subtyping(sbt.model = "pam50", data= t(expDatpre),
annot=expAnnx, do.mapping=FALSE)
par(mar=c(7,5,2,2))
table(PAM50pre$subtype)
##
## Basal Her2 LumB LumA Normal
## 9 3 5 6 3
boxplot(expDatpre["HIST1H2AI",] ~ PAM50pre$subtype, las=3, ylab = "Relative expression ofHIST1H2AI gene", xlab = NULL, cex=0, cex.lab = 1.2, cex.axis=1.2)
mtext('BC subtype', 1, 5, cex = 1.2)
PAM50on = molecular.subtyping(sbt.model = "pam50", data= t(expDaton),
annot=expAnnx, do.mapping=FALSE)
PAM50all = molecular.subtyping(sbt.model = "pam50", data= t(expDat),
annot=expAnnx, do.mapping=FALSE)
table(PAM50on$subtype)
##
## Basal Her2 LumB LumA Normal
## 10 6 4 4 2
boxplot(expDaton["ESR1",] ~ PAM50on$subtype, las=3)
dim(expDatpre)
## [1] 25288 26
table(PAM50on$subtype, PAM50pre$subtype)
##
## Basal Her2 LumB LumA Normal
## Basal 7 0 0 1 2
## Her2 1 3 1 0 1
## LumB 0 0 3 1 0
## LumA 0 0 1 3 0
## Normal 1 0 0 1 0
Above I performed PAM50 molecular subtyping on the expression data, both pre- and on- treatment. I then compared them, to see how many tumours seemingly ‘changed’ molecular subtype upon anti-PD1 treatment.
pid = sort(unique(brcaDat1@meta.data$patient_id))
pid = pid[-c(13,16,18,22,29)]
pid
## [1] "BIOKEY_1" "BIOKEY_10" "BIOKEY_11" "BIOKEY_12" "BIOKEY_13" "BIOKEY_14"
## [7] "BIOKEY_15" "BIOKEY_16" "BIOKEY_17" "BIOKEY_18" "BIOKEY_19" "BIOKEY_2"
## [13] "BIOKEY_21" "BIOKEY_22" "BIOKEY_24" "BIOKEY_26" "BIOKEY_27" "BIOKEY_28"
## [19] "BIOKEY_3" "BIOKEY_30" "BIOKEY_31" "BIOKEY_4" "BIOKEY_5" "BIOKEY_6"
## [25] "BIOKEY_8" "BIOKEY_9"
cbind(pid, colnames(expDatpre))
## pid
## [1,] "BIOKEY_1" "T_BIOKEY_1_Pre"
## [2,] "BIOKEY_10" "T_BIOKEY_10_Pre"
## [3,] "BIOKEY_11" "T_BIOKEY_11_Pre"
## [4,] "BIOKEY_12" "E_BIOKEY_12_Pre"
## [5,] "BIOKEY_13" "H_BIOKEY_13_Pre"
## [6,] "BIOKEY_14" "T_BIOKEY_14_Pre"
## [7,] "BIOKEY_15" "T_BIOKEY_15_Pre"
## [8,] "BIOKEY_16" "T_BIOKEY_16_Pre"
## [9,] "BIOKEY_17" "E_BIOKEY_17_Pre"
## [10,] "BIOKEY_18" "E_BIOKEY_18_Pre"
## [11,] "BIOKEY_19" "T_BIOKEY_19_Pre"
## [12,] "BIOKEY_2" "T_BIOKEY_2_Pre"
## [13,] "BIOKEY_21" "E_BIOKEY_21_Pre"
## [14,] "BIOKEY_22" "E_BIOKEY_22_Pre"
## [15,] "BIOKEY_24" "E_BIOKEY_24_Pre"
## [16,] "BIOKEY_26" "T_BIOKEY_26_Pre"
## [17,] "BIOKEY_27" "E_BIOKEY_27_Pre"
## [18,] "BIOKEY_28" "H_BIOKEY_28_Pre"
## [19,] "BIOKEY_3" "E_BIOKEY_3_Pre"
## [20,] "BIOKEY_30" "E_BIOKEY_30_Pre"
## [21,] "BIOKEY_31" "T_BIOKEY_31_Pre"
## [22,] "BIOKEY_4" "E_BIOKEY_4_Pre"
## [23,] "BIOKEY_5" "E_BIOKEY_5_Pre"
## [24,] "BIOKEY_6" "E_BIOKEY_6_Pre"
## [25,] "BIOKEY_8" "T_BIOKEY_8_Pre"
## [26,] "BIOKEY_9" "T_BIOKEY_9_Pre"
mt = match(pid, brcaDat1@meta.data$patient_id)
bc_type= brcaDat1@meta.data$BC_type[mt]
table(PAM50pre$subtype, bc_type)
## bc_type
## ER+ HER2+ TNBC
## Basal 0 2 7
## Her2 1 0 2
## LumB 5 0 0
## LumA 6 0 0
## Normal 0 0 3
I then compared the intrinsic molecular subtypes to the overall BC type,
I tried this code
table(PAM50all$subtype.crisp, brcaDat1$BC_type) but I
couldn’t get it to work because the arguments weren’t all of the same
length. This is because the brcaDat1 object has a row for each sequence,
whereas the PAM50 object only has a row for each patient. I should ask
Mik about this!
dim(expDat)
## [1] 25288 52
delta1 = as.matrix(expDat[,1] - expDat[,2])
delta2 = as.matrix(expDat[,23] - expDat[,24])
delta3 = as.matrix(expDat[,37] - expDat[,38])
delta4 = as.matrix(expDat[,43] - expDat[,44])
delta5 = as.matrix(expDat[,45] - expDat[,46])
delta6 = as.matrix(expDat[,47] - expDat[,48])
delta8 = as.matrix(expDat[,49] - expDat[,50])
delta9 = as.matrix(expDat[,51] - expDat[,52])
delta10 = as.matrix(expDat[,3] - expDat[,4])
delta11 = as.matrix(expDat[,5] - expDat[,6])
delta12 = as.matrix(expDat[,7] - expDat[,8])
delta13 = as.matrix(expDat[,9] - expDat[,10])
delta14 = as.matrix(expDat[,11] - expDat[,12])
delta15 = as.matrix(expDat[,13] - expDat[,14])
delta16 = as.matrix(expDat[,15] - expDat[,16])
delta17 = as.matrix(expDat[,17] - expDat[,18])
delta18 = as.matrix(expDat[,19] - expDat[,20])
delta19 = as.matrix(expDat[,21] - expDat[,22])
delta21 = as.matrix(expDat[,25] - expDat[,26])
delta22 = as.matrix(expDat[,27] - expDat[,28])
delta24 = as.matrix(expDat[,29] - expDat[,30])
delta26 = as.matrix(expDat[,31] - expDat[,32])
delta27 = as.matrix(expDat[,33] - expDat[,34])
delta28 = as.matrix(expDat[,35] - expDat[,36])
delta30 = as.matrix(expDat[,39] - expDat[,40])
delta31 = as.matrix(expDat[,41] - expDat[,42])
dim(expDat)
## [1] 25288 52
deltag = cbind(delta1, delta10, delta11, delta12, delta13, delta14, delta15, delta16, delta17, delta18, delta19, delta2, delta21, delta22, delta24, delta26, delta27, delta28, delta3, delta30, delta31, delta4, delta5, delta6, delta8, delta9)
colnames(deltag) = c("T_BIOKEY1delta", "T_BIOKEY10delta", "T_BIOKEY11delta", "E_BIOKEY12delta", "H_BIOKEY13delta", "T_BIOKEY14delta", "T_BIOKEY15delta", "T_BIOKEY16delta", "E_BIOKEY17delta", "E_BIOKEY18delta", "T_BIOKEY19delta", "T_BIOKEY2delta", "E_BIOKEY21delta", "E_BIOKEY22delta", "E_BIOKEY24delta", "T_BIOKEY26delta", "E_BIOKEY27delta", "H_BIOKEY28delta", "E_BIOKEY3delta", "E_BIOKEY30delta", "T_BIOKEY31delta", "E_BIOKEY4delta", "E_BIOKEY5delta", "E_BIOKEY6delta", "T_BIOKEY8delta", "T_BIOKEY9delta")
deltaSD = apply(deltag, 1, sd)
deltaMean = apply(deltag, 1, mean)
plot(deltaMean, deltaSD, pch='.')
keep2000 = order(deltaSD, decreasing=TRUE)[1:2000]
head(keep2000)
## [1] 7044 6475 7008 18441 7042 7083
delta2000 = deltag[keep2000,]
deltag[1:5,1:5]
## T_BIOKEY1delta T_BIOKEY10delta T_BIOKEY11delta E_BIOKEY12delta
## A1BG 0.0836532252 0.003545888 0.4522258 -0.6815619
## A1BG-AS1 0.0008813205 -0.043788828 1.3636316 0.2728971
## A2M 3.5470728193 -0.812956995 0.2421816 -1.3257193
## A2M-AS1 -1.2007507119 0.628368657 0.1083350 0.3717461
## A4GALT 3.3390989679 -0.933972749 0.7811581 -1.3732026
## H_BIOKEY13delta
## A1BG -0.3951139
## A1BG-AS1 -1.4877207
## A2M 0.7511477
## A2M-AS1 -0.7835323
## A4GALT 0.1514798
dim(deltag)
## [1] 25288 26
In this section of code I made my deltag object, which
consists of the pre-treamtent expression values being subtracted from
the on-treatment expression values, for every gene of every tumour. A
positive value for a gene’s expression in this object thus means that
that gene increased in expression upon treatment, while a negative value
means that that gene decreased in expression upon treatment, for that
tumour.
I also made a delta2000 object, where I sorted these genes by their standard deviation, and the delta2000 object only includes the top 2000 genes with the highest SDs.
dim(delta2000)
## [1] 2000 26
hc = hclust(dist(t(delta2000)))
plot(hc, hang = -1)
delta2000_scale = t(apply(delta2000, 1, scale, scale=TRUE))
delta2000_scale[delta2000_scale < -3] = -3
delta2000_scale[delta2000_scale > 3] = 3
table(brcaDat1$patient_id, brcaDat1$BC_type)
##
## ER+ HER2+ TNBC
## BIOKEY_1 0 0 8103
## BIOKEY_10 0 0 9192
## BIOKEY_11 0 0 3966
## BIOKEY_12 6534 0 0
## BIOKEY_13 0 3712 0
## BIOKEY_14 0 0 3409
## BIOKEY_15 0 0 6874
## BIOKEY_16 0 0 8706
## BIOKEY_17 3104 0 0
## BIOKEY_18 3660 0 0
## BIOKEY_19 0 0 4926
## BIOKEY_2 0 0 4679
## BIOKEY_20 2164 0 0
## BIOKEY_21 3280 0 0
## BIOKEY_22 2295 0 0
## BIOKEY_23 0 2764 0
## BIOKEY_24 2363 0 0
## BIOKEY_25 0 0 613
## BIOKEY_26 0 0 5711
## BIOKEY_27 3491 0 0
## BIOKEY_28 0 2514 0
## BIOKEY_29 857 0 0
## BIOKEY_3 3780 0 0
## BIOKEY_30 3030 0 0
## BIOKEY_31 0 0 4581
## BIOKEY_4 6480 0 0
## BIOKEY_5 2711 0 0
## BIOKEY_6 3640 0 0
## BIOKEY_7 1640 0 0
## BIOKEY_8 0 0 1405
## BIOKEY_9 0 0 2809
## I don't think I need these:
## Don't need those!
subtype_cc = c("black", "purple", "lightblue", "blue", "green")[as.numeric(as.factor(PAM50pre$subtype))]
bctype_cc = c("blue", "purple", "black")[as.numeric(as.factor(bc_type))]
table(bc_type, bctype_cc)
## bctype_cc
## bc_type black blue purple
## ER+ 0 12 0
## HER2+ 0 0 2
## TNBC 12 0 0
The unsupervised clustering is shown here. I then also scaled the delta2000 object so that all gene expression values are scaled between -3 and +3. I then determine the quantiles of the delta2000 object, determine colours for the molecular subtypes of cancer, as well as the BC type. A heatmap is shown, where the colours are based off their quantiles, and I created a boxplot of this too, as well as a density plot.
what actually is drcg_cols? Ask Mik.
colnames(delta2000_scale) = colnames(delta2000)
deltaclust = hclust(as.dist(1-cor(delta2000_scale)))
deltad = as.dendrogram(deltaclust)
cutd = cutree((deltad),2)
table(cutd)
## cutd
## 1 2
## 13 13
cutd[1:10]
## T_BIOKEY1delta T_BIOKEY10delta T_BIOKEY11delta E_BIOKEY12delta H_BIOKEY13delta
## 1 2 2 2 1
## T_BIOKEY14delta T_BIOKEY15delta T_BIOKEY16delta E_BIOKEY17delta E_BIOKEY18delta
## 1 2 1 2 2
par(mar = c(8,3,2,2))
plot(color_branches(deltad,2),leaflab="p", cex = 1, axes=F)
here I performed the unsupervised clustering of the delta2000 object to create the infamous red/green cluster diagram.
04/08/2022: changed the deltaclust object from
deltaclust = hclust(as.dist(1-cor(delta2000))) to
deltaclust = hclust(as.dist(1-cor(delta2000_scale)))
group = rep( c( "PRE", "ON"), c(26,26))
designdeltag = model.matrix(~rep(1, ncol(delta2000)) - 1)
deltaglim = lmFit(delta2000, designdeltag)
deltaglim = eBayes(deltaglim)
ttdeltaglim = topTable(deltaglim, coef=1, adjust.method = "BH", n=nrow(delta2000))
ttdeltagsig=ttdeltaglim[ttdeltaglim$adj.P.Val<0.05,]
ttdeltagsig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## HBA2 -4.063105 -4.063105 -5.520202 1.801463e-07 0.0002072791 6.7369872
## TUBA3D 3.246357 3.246357 5.489632 2.072791e-07 0.0002072791 6.6124548
## HBA1 -3.288387 -3.288387 -4.816309 4.052127e-06 0.0027014183 3.9785527
## HBB -3.584036 -3.584036 -4.480017 1.633443e-05 0.0081672166 2.7486406
## HBM -2.410997 -2.410997 -4.091959 7.512812e-05 0.0300512468 1.4086642
## IGLV3-21 -2.967492 -2.967492 -3.948976 1.287492e-04 0.0372666557 0.9377503
## IGHV4-28 -2.599833 -2.599833 -3.945483 1.304333e-04 0.0372666557 0.9264059
## CSF2 -2.262941 -2.262941 -3.841424 1.914034e-04 0.0478508408 0.5919855
## IGHV3-72 -2.819188 -2.819188 -3.797471 2.245830e-04 0.0499073320 0.4528164
## NA NA NA NA NA NA NA
rownames(ttdeltagsig)
## [1] "HBA2" "TUBA3D" "HBA1" "HBB" "HBM" "IGLV3-21" "IGHV4-28"
## [8] "CSF2" "IGHV3-72"
expDatdeltagsig = expDat[rownames(ttdeltagsig),]
heatmap.2(expDatdeltagsig, trace='none', scale='none', col=bluered, Rowv=T, Colv=T, dendrogram = 'both')
deltagsig = deltag[rownames(ttdeltagsig),]
heatmap.2(deltagsig, trace='none', scale='none', col=bluered, Rowv=T, Colv=T, dendrogram = 'both')
Deltag analysis performs limma analysis on all delta genes (on-pre), and then filters this for ones with an adjusted p-value over 0.05. To explore the original expression values better, I have made a heatmap of the raw expression values for these filtered genes.
sampleData = brcaDat1@meta.data[,-c(1:4,9:15),] %>% unique()
head(sampleData)
sampleData
sampleData = sampleData[-c(11:12,33:34,37:38,41:42,51:52),]
cbind(colnames(expDat), sampleData)
sampleDataAll = dplyr::arrange(sampleData, patient_id, desc(timepoint))
cbind(colnames(expDat), sampleDataAll)
rownames(sampleDataAll) = paste0(sampleDataAll$patient_id, "_", sampleDataAll$timepoint)
sampleData3 = brcaDat1@meta.data[,-c(1:4,6,9:15),] %>% unique()
sampleData3 = sampleData3[-c(6,17,19,21,26),]
rownames(sampleData3) = paste0(sampleData3$patient_id, "_", "Pre")
cbind(colnames(expDatpre), sampleData3)
sampleDataPatient = dplyr::arrange(sampleData3, patient_id)
cbind(colnames(expDatpre), sampleDataPatient)
cbind(colnames(delta2000_scale), sampleDataPatient)
Here I created the sampleDataPatient object; a matrix
showing the patient ID, expansion, and BC type.
pidx = sort(sampleData3$patient_id)
cbind(pidx, colnames(expDatpre))
## pidx
## [1,] "BIOKEY_1" "T_BIOKEY_1_Pre"
## [2,] "BIOKEY_10" "T_BIOKEY_10_Pre"
## [3,] "BIOKEY_11" "T_BIOKEY_11_Pre"
## [4,] "BIOKEY_12" "E_BIOKEY_12_Pre"
## [5,] "BIOKEY_13" "H_BIOKEY_13_Pre"
## [6,] "BIOKEY_14" "T_BIOKEY_14_Pre"
## [7,] "BIOKEY_15" "T_BIOKEY_15_Pre"
## [8,] "BIOKEY_16" "T_BIOKEY_16_Pre"
## [9,] "BIOKEY_17" "E_BIOKEY_17_Pre"
## [10,] "BIOKEY_18" "E_BIOKEY_18_Pre"
## [11,] "BIOKEY_19" "T_BIOKEY_19_Pre"
## [12,] "BIOKEY_2" "T_BIOKEY_2_Pre"
## [13,] "BIOKEY_21" "E_BIOKEY_21_Pre"
## [14,] "BIOKEY_22" "E_BIOKEY_22_Pre"
## [15,] "BIOKEY_24" "E_BIOKEY_24_Pre"
## [16,] "BIOKEY_26" "T_BIOKEY_26_Pre"
## [17,] "BIOKEY_27" "E_BIOKEY_27_Pre"
## [18,] "BIOKEY_28" "H_BIOKEY_28_Pre"
## [19,] "BIOKEY_3" "E_BIOKEY_3_Pre"
## [20,] "BIOKEY_30" "E_BIOKEY_30_Pre"
## [21,] "BIOKEY_31" "T_BIOKEY_31_Pre"
## [22,] "BIOKEY_4" "E_BIOKEY_4_Pre"
## [23,] "BIOKEY_5" "E_BIOKEY_5_Pre"
## [24,] "BIOKEY_6" "E_BIOKEY_6_Pre"
## [25,] "BIOKEY_8" "T_BIOKEY_8_Pre"
## [26,] "BIOKEY_9" "T_BIOKEY_9_Pre"
mt = match(pidx, sampleDataPatient$patient_id)
bc_type= sampleDataPatient$BC_type[mt]
sampleData3$BC_type[mt]
## [1] "HER2+" "TNBC" "TNBC" "TNBC" "TNBC" "TNBC" "HER2+" "ER+" "TNBC"
## [10] "TNBC" "ER+" "ER+" "ER+" "TNBC" "TNBC" "ER+" "ER+" "ER+"
## [19] "TNBC" "ER+" "TNBC" "TNBC" "ER+" "ER+" "ER+" "ER+"
expan = unique(brcaDat1@meta.data$expansion)
#testline haha
expan_cc = c("pink", "purple", "green")[sampleDataPatient$expansion]
expan_cc[1:10]
## [1] NA NA NA NA NA NA NA NA NA NA
expan_cc = as.matrix(expan_cc)
expant = as.numeric(sampleDataPatient$expansion)
## Warning: NAs introduced by coercion
table(rownames(sampleData3), sampleData3$expansion)
##
## E n/a NE
## BIOKEY_1_Pre 0 1 0
## BIOKEY_10_Pre 1 0 0
## BIOKEY_11_Pre 1 0 0
## BIOKEY_12_Pre 1 0 0
## BIOKEY_13_Pre 0 1 0
## BIOKEY_14_Pre 0 0 1
## BIOKEY_15_Pre 1 0 0
## BIOKEY_16_Pre 1 0 0
## BIOKEY_17_Pre 0 0 1
## BIOKEY_18_Pre 1 0 0
## BIOKEY_19_Pre 0 0 1
## BIOKEY_2_Pre 0 0 1
## BIOKEY_21_Pre 0 0 1
## BIOKEY_22_Pre 0 0 1
## BIOKEY_24_Pre 0 0 1
## BIOKEY_26_Pre 0 0 1
## BIOKEY_27_Pre 0 0 1
## BIOKEY_28_Pre 1 0 0
## BIOKEY_3_Pre 0 0 1
## BIOKEY_30_Pre 0 0 1
## BIOKEY_31_Pre 1 0 0
## BIOKEY_4_Pre 0 0 1
## BIOKEY_5_Pre 1 0 0
## BIOKEY_6_Pre 0 0 1
## BIOKEY_8_Pre 0 0 1
## BIOKEY_9_Pre 0 0 1
subtype_cc = c("black", "purple", "lightblue", "blue", "green")[as.numeric(as.factor(PAM50pre$subtype))]
bctype_cc = c("blue", "purple", "black")[as.numeric(as.factor(sampleDataPatient$BC_type))]
sampleDataPatient$BC_type[1:10]
## [1] "TNBC" "TNBC" "TNBC" "ER+" "HER2+" "TNBC" "TNBC" "TNBC" "ER+"
## [10] "ER+"
clustergroups_cc = c("pink", "purple")[cutd]
matrix_cc = rbind(subtype_cc, clustergroups_cc, bctype_cc)
rownames(matrix_cc) = c("Intrinsic Subtypes", "Cluster Groups", "BC Type")
matrix_cc = t(matrix_cc)
dim(matrix_cc)
dim(delta2000_scale)
rownames(matrix_cc) = colnames(delta2000_scale)
matrix_ccnona = matrix_cc[-c(1,5),]
dim(matrix_ccnona)
heatmap.3(delta2000_scale, trace='none', scale='none', col=bluered(50),
ColSideColors = matrix_cc, margins=c(7,4),labCol = colnames(delta2000_scale),
distfun = function(x) as.dist(1-cor(t(x))))
table(brcaDat1@meta.data$BC_type, brcaDat1@meta.data$patient_id)
##
## BIOKEY_1 BIOKEY_10 BIOKEY_11 BIOKEY_12 BIOKEY_13 BIOKEY_14 BIOKEY_15
## ER+ 0 0 0 6534 0 0 0
## HER2+ 0 0 0 0 3712 0 0
## TNBC 8103 9192 3966 0 0 3409 6874
##
## BIOKEY_16 BIOKEY_17 BIOKEY_18 BIOKEY_19 BIOKEY_2 BIOKEY_20 BIOKEY_21
## ER+ 0 3104 3660 0 0 2164 3280
## HER2+ 0 0 0 0 0 0 0
## TNBC 8706 0 0 4926 4679 0 0
##
## BIOKEY_22 BIOKEY_23 BIOKEY_24 BIOKEY_25 BIOKEY_26 BIOKEY_27 BIOKEY_28
## ER+ 2295 0 2363 0 0 3491 0
## HER2+ 0 2764 0 0 0 0 2514
## TNBC 0 0 0 613 5711 0 0
##
## BIOKEY_29 BIOKEY_3 BIOKEY_30 BIOKEY_31 BIOKEY_4 BIOKEY_5 BIOKEY_6
## ER+ 857 3780 3030 0 6480 2711 3640
## HER2+ 0 0 0 0 0 0 0
## TNBC 0 0 0 4581 0 0 0
##
## BIOKEY_7 BIOKEY_8 BIOKEY_9
## ER+ 1640 0 0
## HER2+ 0 0 0
## TNBC 0 1405 2809
In this section I created a heatmap.3, showing, for all samples, their cancer type, intrinsic molecular subtype, and whether they expanded on treatment or not. (ask mik to help with expan)
design = model.matrix(~rep(1, ncol(deltag)) - 1)
design[1:10]
## [1] 1 1 1 1 1 1 1 1 1 1
designcutd = model.matrix(~cutd)
designcutd[1:10]
## [1] 1 1 1 1 1 1 1 1 1 1
sampleDataPatientnona = sampleDataPatient[c(2:4, 6:26),]
delta2000nona = delta2000[, c(2:4, 6:26)]
expan = sampleDataPatientnona$expansion
expan = as.matrix(expan)
expanx = ifelse(sampleDataPatientnona$expansion=="E",1,2)
expanx = as.matrix(expanx)
expanx[1:10]
## [1] 1 1 1 2 1 1 2 1 2 2
rownames(expanx) = rownames(sampleDataPatientnona)
designexpanrownames = sort(rownames(expanx))
designexpan = model.matrix(~expanx)
designexpan[1:10]
## [1] 1 1 1 1 1 1 1 1 1 1
rownames(designexpan) = designexpanrownames
colnames(designexpan) = c("Intercept", "NEorNAis2")
expanlim = lmFit(delta2000nona, designexpan)
expanlim = eBayes(expanlim)
ttexpan = topTable(expanlim, coef=2, adjust.method = "BH", n=nrow(delta2000))
ttexpan[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## AL031733.2 -4.325872 1.35067013 -4.133936 0.0001052666 0.1508778 0.813653649
## IGLV3-19 -5.759697 -1.25978520 -3.922705 0.0002154659 0.1508778 0.258196309
## TRBV15 -4.873191 -1.19771268 -3.852849 0.0002719043 0.1508778 0.077922814
## TRBV11-3 -4.872315 0.66476707 -3.821357 0.0003017555 0.1508778 -0.002763729
## IGHV1-46 -5.026703 -1.17420663 -3.440615 0.0010243851 0.3316939 -0.947324970
## IGKV3D-20 -5.085056 -1.12172579 -3.408367 0.0011323679 0.3316939 -1.024532117
## TRAV8-1 -4.415424 -1.32769500 -3.387458 0.0012080421 0.3316939 -1.074343067
## EGR4 -3.803442 0.33185909 -3.333703 0.0014251233 0.3316939 -1.201493575
## AC104971.3 -3.779747 0.06468157 -3.318561 0.0014926226 0.3316939 -1.237072774
## CR2 -3.844174 1.01826237 -3.215965 0.0020356824 0.3750220 -1.475315207
ttexpan200 = rownames(ttexpan[1:200,])
# heatmap.3(ttexpan, trace='none', scale='none', col=bluered(50), ColSideColors = matrix_ccnona, margins=c(7,4),labCol = #colnames(delta2000_scalenona),distfun = function(x) as.dist(1-cor(t(x))))
dim(designexpan)
## [1] 24 2
matrix_cc
## Intrinsic Subtypes Cluster Groups BC Type
## T_BIOKEY1delta "black" "pink" "black"
## T_BIOKEY10delta "purple" "purple" "black"
## T_BIOKEY11delta "purple" "purple" "black"
## E_BIOKEY12delta "lightblue" "purple" "blue"
## H_BIOKEY13delta "black" "pink" "purple"
## T_BIOKEY14delta "green" "pink" "black"
## T_BIOKEY15delta "black" "purple" "black"
## T_BIOKEY16delta "black" "pink" "black"
## E_BIOKEY17delta "lightblue" "purple" "blue"
## E_BIOKEY18delta "blue" "purple" "blue"
## T_BIOKEY19delta "green" "pink" "black"
## T_BIOKEY2delta "black" "purple" "black"
## E_BIOKEY21delta "lightblue" "pink" "blue"
## E_BIOKEY22delta "blue" "pink" "blue"
## E_BIOKEY24delta "blue" "pink" "blue"
## T_BIOKEY26delta "black" "pink" "black"
## E_BIOKEY27delta "blue" "pink" "blue"
## H_BIOKEY28delta "black" "purple" "purple"
## E_BIOKEY3delta "purple" "pink" "blue"
## E_BIOKEY30delta "blue" "purple" "blue"
## T_BIOKEY31delta "black" "purple" "black"
## E_BIOKEY4delta "lightblue" "purple" "blue"
## E_BIOKEY5delta "lightblue" "pink" "blue"
## E_BIOKEY6delta "blue" "purple" "blue"
## T_BIOKEY8delta "green" "purple" "black"
## T_BIOKEY9delta "black" "pink" "black"
dim(delta2000_scale)
## [1] 2000 26
delta2000_scalenona = delta2000_scale[,-c(1,5)]
In this differential expression analysis I am comparing, from all samples, those that did have TCR expansion upon treatment vs those that did not have TCR expansion upon treatment.
library(reactome.db)
reactome()
## Quality control information for reactome:
##
##
## This package has the following mappings:
##
## reactomeEXTID2PATHID has 52575 mapped keys (of 52575 keys)
## reactomeGO2REACTOMEID has 1435 mapped keys (of 1435 keys)
## reactomePATHID2EXTID has 16760 mapped keys (of 16760 keys)
## reactomePATHID2NAME has 21323 mapped keys (of 21323 keys)
## reactomePATHNAME2ID has 21292 mapped keys (of 21292 keys)
## reactomeREACTOMEID2GO has 13963 mapped keys (of 13963 keys)
##
##
## Additional Information about this package:
##
## DB schema: REACTOME_DB
## DB schema version: 79
lapply( as.list(reactomePATHID2EXTID)[1:2], head )
## $`R-HSA-109582`
## [1] "1" "10019" "10112" "10125" "10125" "1017"
##
## $`R-HSA-114608`
## [1] "1" "10184" "10257" "10447" "10487" "10490"
as.list(reactomePATHID2NAME)[1:2]
## $`R-BTA-73843`
## [1] "1-diphosphate: 5-Phosphoribose"
##
## $`R-BTA-1971475`
## [1] "Bos taurus\r: A tetrasaccharide linker sequence is required for GAG synthesis"
library(org.Hs.eg.db)
ann = AnnotationDbi::select(org.Hs.eg.db, keys=ttexpan200,
keytype="SYMBOL", columns="ENTREZID")
library(ReactomePA)
sigEntrez = ann$ENTREZID
sigEntrez[1:10]
## [1] NA "28797" "28572" "28580" "28465" "28874" "28685" "1961" NA
## [10] "1380"
rPAoverrep = enrichPathway(gene=sigEntrez, organism = "human", pvalueCutoff=0.1, readable=T)
rpasig = rPAoverrep[rPAoverrep@result$p.adjust<0.05,]
rpasig %>% as.data.frame() %>% knitr::kable()
| ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count |
|---|
rpasigid = rpasig$geneID
rpasigidcat = cat(rpasigid, sep="\n")
sDPernona = sampleDataPatientnona[c(3,7,8,11:13,15,17,18,20:22),]
delta2000ernona = delta2000nona[, c(3,7,8,11:13,15,17,18,20:22)]
expanernona = sDPernona$expansion
expanernona = as.matrix(expanernona)
expanxernona = ifelse(sDPernona$expansion=="E",1,2)
expanxernona = as.matrix(expanxernona)
rownames(expanxernona) = rownames(sDPernona)
designexpanrownamesernona = sort(rownames(expanxernona))
designexpanernona = model.matrix(~expanxernona)
rownames(designexpanernona) = designexpanrownamesernona
colnames(designexpanernona) = c("Intercept", "NEis2")
expanlimernona = lmFit(delta2000ernona, designexpanernona)
expanlimernona = eBayes(expanlimernona)
ttexpanernona = topTable(expanlimernona, coef=2, adjust.method = "BH", n=nrow(delta2000ernona))
ttexpanernona[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## IGLV3-19 -9.172286 -2.51432632 -4.291132 0.0002653457 0.2885612 -0.3049814
## IGHV2-26 -6.344066 0.66816235 -4.230732 0.0003087585 0.2885612 -0.4027982
## HBB 10.298148 -3.13445924 4.083010 0.0004470476 0.2885612 -0.6429953
## AC129492.1 -5.980862 -0.40993195 -3.889357 0.0007250294 0.2885612 -0.9593849
## IGKV1-6 -8.408806 -0.05654878 -3.866863 0.0007667950 0.2885612 -0.9962075
## IGLV2-8 -9.337660 -1.62935994 -3.818089 0.0008656835 0.2885612 -1.0760764
## AC079341.1 -5.242461 0.92919376 -3.712145 0.0011258311 0.3216660 -1.2496237
## RN7SL832P -5.463674 -0.77891718 -3.344216 0.0027756161 0.6939040 -1.8504523
## CYP2F1 -4.591541 0.88294177 -3.263862 0.0033711737 0.7491497 -1.9806821
## ACTA1 4.941371 0.66435753 3.140075 0.0045375758 0.9075152 -2.1801383
dim(ttexpanernona)
## [1] 2000 6
volcanoplot(expanlimernona, coef = 2, xlab = "log2FC", ylab = "-log10P_value")
par(mfrow=c(3,4))
for(i in rpasigid) boxplot( expDat[i,] ~ rep(c("O", "P"), 26) * rep(cutd,rep(2,length(cutd))),
main=i)
for(i in rpasigid) boxplot(deltag[i,] ~ rep(c("Clustered groups"), 26) * sampleDataPatient$BC_type)
boxplot(deltag["IGLV2-8",] ~ rep(c("O"), 26) * sampleDataPatient$BC_type)
This analysis performs a differential expression analysis of the ER+
samples, comparing those that had TCR expansion to those that did not
have TCR expansion. I then also started to investigate this IGLV2-8
gene, as well as beginning investigation of the rpasigid
object, which contains the geneIDs of the statistically significantly
overrepresented genes from GSEA.
library(ggplot2)
DimPlot(brcaDat1, reduction = "tsne", label = F, label.box = F, label.size = 4, repel = F, group.by = "cellType", raster = F, split.by = "BC_type")
DimPlot(brcaDat1, reduction = "tsne", label = F, label.box = F, label.size = 4, repel = F, group.by = "CHETAH", raster = F, split.by = "BC_type")
table(brcaDat1@meta.data$CHETAH)
##
## CAF CD4 T cell CD8 T cell Dendritic Endothelial
## 8780 894 817 15 725
## Macrophage Mast Myofibroblast NK Node1
## 113 1 174 158 9526
## Node10 Node2 Node3 Node4 Node5
## 1073 1835 5525 714 3534
## Node6 Node7 Node8 Node9 Plasma
## 616 14 37 1886 235
## reg. T cell Unassigned
## 513 85808
prop.table(table(brcaDat1@meta.data$CHETAH),1)
##
## CAF CD4 T cell CD8 T cell Dendritic Endothelial
## 1 1 1 1 1
## Macrophage Mast Myofibroblast NK Node1
## 1 1 1 1 1
## Node10 Node2 Node3 Node4 Node5
## 1 1 1 1 1
## Node6 Node7 Node8 Node9 Plasma
## 1 1 1 1 1
## reg. T cell Unassigned
## 1 1
FeaturePlot(brcaDat1, features = c("PDCD1"), reduction = 'tsne', max.cutoff = 3, ncol = 3, split.by = "BC_type")
kp = brcaDat1@meta.data[["cellType"]]=="Cancer_cell"
FeaturePlot(brcaDat1, features = c("PDCD1"), reduction = 'tsne', max.cutoff = 3, ncol = 3, split.by = "BC_type", cells = which(kp))
FeaturePlot(brcaDat1, features = c("IQSEC2"),
reduction = 'tsne', max.cutoff = 3, ncol = 3, split.by = "BC_type")
fn14 = brcaDat1@assays$RNA["CD163L1",] %>% as.numeric()
cell_type_tab = table(brcaDat1@meta.data[["cellType"]], fn14>0)
prop = round(prop.table(cell_type_tab, 1)[,2],3)
knitr::kable(cbind(cell_type_tab, prop), col.names = c("FN14=0", "FN14>0", "Proportion"))
| FN14=0 | FN14>0 | Proportion | |
|---|---|---|---|
| B_cell | 10033 | 15 | 0.001 |
| Cancer_cell | 29234 | 838 | 0.028 |
| Endothelial_cell | 4256 | 40 | 0.009 |
| Fibroblast | 17746 | 225 | 0.013 |
| Mast_cell | 721 | 0 | 0.000 |
| Myeloid_cell | 10737 | 320 | 0.029 |
| pDC | 705 | 0 | 0.000 |
| T_cell | 48074 | 49 | 0.001 |
data.frame(fn14,
cellType = brcaDat1@meta.data[["cellType"]],
bcType = brcaDat1@meta.data[["BC_type"]],
trt = brcaDat1@meta.data[["timepoint"]],
patID = brcaDat1@meta.data[["patient_id"]],
exp = brcaDat1@meta.data[["expansion"]]) %>%
dplyr::filter(fn14>0, cellType == "Cancer_cell", bcType!="HER2+") %>%
ggplot(., aes(x=patID, y=fn14)) +
geom_boxplot(outlier.alpha=0) +
geom_jitter(size=0.1) +
facet_wrap(~exp, scales = 'free_x') + xlab("") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ggtitle("FN14 in fibroblasts only")
data.frame(fn14,
cellType = brcaDat1@meta.data[["cellType"]],
bcType = brcaDat1@meta.data[["BC_type"]],
trt = brcaDat1@meta.data[["timepoint"]],
patID = brcaDat1@meta.data[["patient_id"]],
exp = brcaDat1@meta.data[["expansion"]]) %>%
dplyr::filter(fn14>0, cellType == "Cancer_cell", bcType=="TNBC", exp!="n/a") %>%
ggplot(., aes(x=patID, y=fn14, fill=trt)) +
geom_boxplot(outlier.alpha=0) +
geom_point(size=0.1, position=position_jitterdodge(jitter.width=0.12)) +
facet_wrap(~exp, scales = 'free_x') + xlab("") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
data.frame(fn14,
cellType = brcaDat1@meta.data[["cellType"]],
bcType = brcaDat1@meta.data[["BC_type"]],
trt = brcaDat1@meta.data[["timepoint"]]) %>%
dplyr::filter(fn14>0) %>%
ggplot(., aes(x=cellType, y=fn14, fill=bcType), base_size=5) +
geom_boxplot(outlier.alpha=0) +
geom_point(size=0.1, position=position_jitterdodge(jitter.width=0.12)) + xlab("Cell Type") + ylab("CD163L1 expression")
This code chunk is sort of just a test, I’m just playing around with graphical presentations. I should explore this more to get some cool diagrams for my thesis!
sDPpreer = sampleDataPatient[c(4,9:10,13:15,17,19:20,22:24),]
expDatpreer = expDatpre[,c(4,9:10,13:15,17,19:20,22:24)]
expanpreer = sDPpreer$expansion
expanpreer = as.matrix(expanpreer)
expanpreer = ifelse(sDPpreer$expansion=="E",1,2)
expanpreer = as.matrix(expanpreer)
rownames(expanpreer) = rownames(sDPpreer)
designexpanrownamespreer = sort(rownames(expanpreer))
designexpanpreer = model.matrix(~expanpreer)
rownames(designexpanpreer) = designexpanrownamespreer
colnames(designexpanpreer) = c("Intercept", "NEis2")
expanlimpreer = lmFit(expDatpreer, designexpanpreer)
expanlimpreer = eBayes(expanlimpreer)
ttexpanpreer = topTable(expanlimpreer, coef=2, adjust.method = "BH", n=nrow(expDatpreer))
ttexpanpreer[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 5.582422 1.3117436 9.456627 3.469769e-07 0.008774353 -1.094997
## AC025283.2 4.320308 0.3651582 7.551229 4.221597e-06 0.053377866 -1.429583
## KRT17 6.866016 3.4697135 5.953387 4.827880e-05 0.354111657 -1.875140
## CDKL2 -3.767975 -1.8340165 -5.772432 6.503038e-05 0.354111657 -1.939019
## RPP40 3.292151 1.8815138 5.727983 7.001575e-05 0.354111657 -1.955201
## TACSTD2 3.156901 5.0175682 5.583690 8.915770e-05 0.375769987 -2.009105
## DAPL1 4.283263 1.3820517 5.224851 1.646792e-04 0.503400586 -2.152675
## AL731557.1 -3.486447 -1.9436841 -5.183956 1.768059e-04 0.503400586 -2.169935
## PRSS16 3.374282 1.3985308 5.165114 1.827039e-04 0.503400586 -2.177950
## CHRNA7 -3.397043 -1.5587374 -5.115991 1.990670e-04 0.503400586 -2.199038
ttexpanpreer[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 5.582422 1.3117436 9.456627 3.469769e-07 0.008774353 -1.094997
## AC025283.2 4.320308 0.3651582 7.551229 4.221597e-06 0.053377866 -1.429583
## KRT17 6.866016 3.4697135 5.953387 4.827880e-05 0.354111657 -1.875140
## CDKL2 -3.767975 -1.8340165 -5.772432 6.503038e-05 0.354111657 -1.939019
## RPP40 3.292151 1.8815138 5.727983 7.001575e-05 0.354111657 -1.955201
## TACSTD2 3.156901 5.0175682 5.583690 8.915770e-05 0.375769987 -2.009105
## DAPL1 4.283263 1.3820517 5.224851 1.646792e-04 0.503400586 -2.152675
## AL731557.1 -3.486447 -1.9436841 -5.183956 1.768059e-04 0.503400586 -2.169935
## PRSS16 3.374282 1.3985308 5.165114 1.827039e-04 0.503400586 -2.177950
## CHRNA7 -3.397043 -1.5587374 -5.115991 1.990670e-04 0.503400586 -2.199038
ttexpanpreer[5793,]
## logFC AveExpr t P.Value adj.P.Val B
## JUP 0.7290147 5.686567 1.063748 0.3068324 0.9629482 -4.648902
ttexpanpreersig=ttexpanpreer[ttexpanpreer$adj.P.Val<0.05,]
ttexpanpreersig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 5.582422 1.311744 9.456627 3.469769e-07 0.008774353 -1.094997
## NA NA NA NA NA NA NA
## NA.1 NA NA NA NA NA NA
## NA.2 NA NA NA NA NA NA
## NA.3 NA NA NA NA NA NA
## NA.4 NA NA NA NA NA NA
## NA.5 NA NA NA NA NA NA
## NA.6 NA NA NA NA NA NA
## NA.7 NA NA NA NA NA NA
## NA.8 NA NA NA NA NA NA
rownames(ttexpanpreersig)
## [1] "CD163L1"
##
match("IGLV2-8", rownames(ttexpanpreer))
## [1] 3969
match("PTN", rownames(ttexpanpreer))
## [1] 786
match("IQSEC2", rownames(ttexpanpreer))
## [1] 130
ttexpanpreer[130,]
## logFC AveExpr t P.Value adj.P.Val B
## IQSEC2 3.54023 2.567533 3.337024 0.005362782 0.9629482 -3.16074
match("HIST1H2AI", rownames(ttexpanpreer))
## [1] 5067
ttexpanpreer[5067,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -1.552631 -1.279491 -1.15892 0.2673656 0.9629482 -4.601018
match("CD247", rownames(ttexpanpreer))
## [1] 5620
ttexpanpreer[5620,]
## logFC AveExpr t P.Value adj.P.Val B
## CD247 -1.202195 5.162475 -1.087644 0.2965364 0.9629482 -4.637164
match("SLC4A1", rownames(ttexpanpreer))
## [1] 578
ttexpanpreer[578,]
## logFC AveExpr t P.Value adj.P.Val B
## SLC4A1 -1.877952 -2.596204 -2.359828 0.03461331 0.9629482 -3.829772
match("AL031733.2", rownames(ttexpanpreer))
## [1] 16319
ttexpanpreer[match("AL031733.2", rownames(ttexpanpreer)),]
## logFC AveExpr t P.Value adj.P.Val B
## AL031733.2 -0.1906197 -3.018037 -0.3001253 0.7688341 0.9629482 -4.901747
ttexpanpreer[match("STX8", rownames(ttexpanpreer)),]
## logFC AveExpr t P.Value adj.P.Val B
## STX8 0.6671938 5.151681 1.579404 0.1382944 0.9629482 -4.358354
topgene = match(rownames(ttexpanpreer)[1], rownames( expDatpre))
ttest = t.test (expDatpreer[topgene,]~designexpanpreer[,2])
ttest
##
## Welch Two Sample t-test
##
## data: expDatpreer[topgene, ] by designexpanpreer[, 2]
## t = -8.8635, df = 3.0186, p-value = 0.002955
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -7.579846 -3.584997
## sample estimates:
## mean in group 1 mean in group 2
## -2.875073 2.707349
save(list = "expanlimpreer", file = "expanlimpreer.Rdata")
This statistical analysis repeats the above ER+ E vs ER+ NE, but this analysis only uses the pre-treatment samples, whereas the last analysis used the delta2000. This means that results from this analysis may better help in predictive values for anti-PD1 response in ER+ BC.
From the t-test, it can be seen that the mean in group 1 (E) has an expression of -2.36, while that of group 2 (NE) is 2.71
library("DOSE")
library("clusterProfiler")
ttexpanpreernames = rownames(ttexpanpreer)
##
expanlimpreerstats = expanlimpreer$t[,2]
expanlimpreerstats %>% sort(., decreasing=TRUE) %>% head()
## CD163L1 AC025283.2 KRT17 RPP40 TACSTD2 DAPL1
## 9.456627 7.551229 5.953387 5.727983 5.583690 5.224851
expanlimpreerallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpreerstats), keytype="SYMBOL", columns="ENTREZID")
names(expanlimpreerstats) = expanlimpreerallEntrez$ENTREZID[ match(names(expanlimpreerstats), expanlimpreerallEntrez$SYMBOL)]
expanlimpreerstats %>% sort(., decreasing=TRUE) %>% head()
## 283316 <NA> 3872 10799 4070 92196
## 9.456627 7.551229 5.953387 5.727983 5.583690 5.224851
expanlimpreerstats_sort = sort(expanlimpreerstats, decreasing=TRUE)
expanlimpreerrPAgsea = gseGO(expanlimpreerstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (20.32% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpreerrPAgsea[1:5,] %>% knitr::kable()
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalues | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:2000027 | GO:2000027 | regulation of animal organ morphogenesis | 128 | 0.5486568 | 2.300298 | 0.0001673 | 0.0020584 | 0.0012012 | 3423 | tags=41%, list=14%, signal=36% | 4070/9863/7481/6092/6091/9620/3082/2246/57216/6591/144165/57664/7422/2719/4920/1601/9241/367/219736/7474/2252/1855/128272/59277/166336/2535/7476/2239/64750/6662/80320/23213/10252/652/6423/9500/1952/6422/655/7490/7472/79864/10818/8324/30812/8321/27031/26585/6495/57154/55727/22881/10082 |
| GO:0045216 | GO:0045216 | cell-cell junction organization | 198 | 0.5054890 | 2.244662 | 0.0001614 | 0.0020584 | 0.0012012 | 4866 | tags=41%, list=19%, signal=33% | 9076/1525/153562/1364/1366/1365/7481/9073/79778/1829/100506658/6714/1436/64065/6591/928/8650/7422/8443/50848/5584/1500/7042/143098/1009/1948/57662/999/80139/85302/5872/1495/5317/10207/7043/8777/7122/9253/55243/4641/286676/1832/23396/222256/7414/4771/6833/91862/421/57555/7368/7082/29841/2706/8502/654/79191/2697/3728/3553/5010/23136/8642/79629/5318/187/79987/9231/4867/1003/10979/389/5657/83700/1894/6722/402/24146/83660/1501/1000 |
| GO:0032963 | GO:0032963 | collagen metabolic process | 91 | 0.5598317 | 2.232985 | 0.0001709 | 0.0020584 | 0.0012012 | 4745 | tags=48%, list=19%, signal=39% | 55214/55323/4327/79148/4239/54206/4323/5159/1514/1289/7043/5045/11117/1471/90993/2191/9902/7349/57333/4320/1278/871/652/1277/1513/861/4314/8510/114757/8751/9697/4880/4318/55512/4312/51430/4313/5653/63894/5657/10893/23371/5467/5645 |
| GO:0060688 | GO:0060688 | regulation of morphogenesis of a branching structure | 50 | 0.6217732 | 2.223879 | 0.0001768 | 0.0020584 | 0.0012012 | 3588 | tags=52%, list=14%, signal=45% | 4070/3082/6591/7422/9241/9175/367/7474/2252/59277/2263/6662/4192/23213/652/9500/120892/6422/655/2099/7472/30812/26585/6495/55727/6474 |
| GO:0030199 | GO:0030199 | collagen fibril organization | 59 | 0.6011415 | 2.219806 | 0.0001756 | 0.0020584 | 0.0012012 | 4258 | tags=56%, list=17%, signal=47% | 2331/81792/7042/4015/1281/4921/1290/30008/1289/1805/11117/10491/60681/1303/165/4060/84171/4320/1278/871/6423/1277/4017/84695/7837/1311/4016/26585/1545/50509/63894/7092/7373 |
expanlimpreerrPAgsea[1:5,]
## ID Description
## GO:2000027 GO:2000027 regulation of animal organ morphogenesis
## GO:0045216 GO:0045216 cell-cell junction organization
## GO:0032963 GO:0032963 collagen metabolic process
## GO:0060688 GO:0060688 regulation of morphogenesis of a branching structure
## GO:0030199 GO:0030199 collagen fibril organization
## setSize enrichmentScore NES pvalue p.adjust
## GO:2000027 128 0.5486568 2.300298 0.0001673360 0.002058355
## GO:0045216 198 0.5054890 2.244662 0.0001613944 0.002058355
## GO:0032963 91 0.5598317 2.232985 0.0001708526 0.002058355
## GO:0060688 50 0.6217732 2.223879 0.0001768347 0.002058355
## GO:0030199 59 0.6011415 2.219806 0.0001755618 0.002058355
## qvalues rank leading_edge
## GO:2000027 0.001201235 3423 tags=41%, list=14%, signal=36%
## GO:0045216 0.001201235 4866 tags=41%, list=19%, signal=33%
## GO:0032963 0.001201235 4745 tags=48%, list=19%, signal=39%
## GO:0060688 0.001201235 3588 tags=52%, list=14%, signal=45%
## GO:0030199 0.001201235 4258 tags=56%, list=17%, signal=47%
## core_enrichment
## GO:2000027 4070/9863/7481/6092/6091/9620/3082/2246/57216/6591/144165/57664/7422/2719/4920/1601/9241/367/219736/7474/2252/1855/128272/59277/166336/2535/7476/2239/64750/6662/80320/23213/10252/652/6423/9500/1952/6422/655/7490/7472/79864/10818/8324/30812/8321/27031/26585/6495/57154/55727/22881/10082
## GO:0045216 9076/1525/153562/1364/1366/1365/7481/9073/79778/1829/100506658/6714/1436/64065/6591/928/8650/7422/8443/50848/5584/1500/7042/143098/1009/1948/57662/999/80139/85302/5872/1495/5317/10207/7043/8777/7122/9253/55243/4641/286676/1832/23396/222256/7414/4771/6833/91862/421/57555/7368/7082/29841/2706/8502/654/79191/2697/3728/3553/5010/23136/8642/79629/5318/187/79987/9231/4867/1003/10979/389/5657/83700/1894/6722/402/24146/83660/1501/1000
## GO:0032963 55214/55323/4327/79148/4239/54206/4323/5159/1514/1289/7043/5045/11117/1471/90993/2191/9902/7349/57333/4320/1278/871/652/1277/1513/861/4314/8510/114757/8751/9697/4880/4318/55512/4312/51430/4313/5653/63894/5657/10893/23371/5467/5645
## GO:0060688 4070/3082/6591/7422/9241/9175/367/7474/2252/59277/2263/6662/4192/23213/652/9500/120892/6422/655/2099/7472/30812/26585/6495/55727/6474
## GO:0030199 2331/81792/7042/4015/1281/4921/1290/30008/1289/1805/11117/10491/60681/1303/165/4060/84171/4320/1278/871/6423/1277/4017/84695/7837/1311/4016/26585/1545/50509/63894/7092/7373
expanlimpreerrPAgsea[1:5,11]
## [1] "4070/9863/7481/6092/6091/9620/3082/2246/57216/6591/144165/57664/7422/2719/4920/1601/9241/367/219736/7474/2252/1855/128272/59277/166336/2535/7476/2239/64750/6662/80320/23213/10252/652/6423/9500/1952/6422/655/7490/7472/79864/10818/8324/30812/8321/27031/26585/6495/57154/55727/22881/10082"
## [2] "9076/1525/153562/1364/1366/1365/7481/9073/79778/1829/100506658/6714/1436/64065/6591/928/8650/7422/8443/50848/5584/1500/7042/143098/1009/1948/57662/999/80139/85302/5872/1495/5317/10207/7043/8777/7122/9253/55243/4641/286676/1832/23396/222256/7414/4771/6833/91862/421/57555/7368/7082/29841/2706/8502/654/79191/2697/3728/3553/5010/23136/8642/79629/5318/187/79987/9231/4867/1003/10979/389/5657/83700/1894/6722/402/24146/83660/1501/1000"
## [3] "55214/55323/4327/79148/4239/54206/4323/5159/1514/1289/7043/5045/11117/1471/90993/2191/9902/7349/57333/4320/1278/871/652/1277/1513/861/4314/8510/114757/8751/9697/4880/4318/55512/4312/51430/4313/5653/63894/5657/10893/23371/5467/5645"
## [4] "4070/3082/6591/7422/9241/9175/367/7474/2252/59277/2263/6662/4192/23213/652/9500/120892/6422/655/2099/7472/30812/26585/6495/55727/6474"
## [5] "2331/81792/7042/4015/1281/4921/1290/30008/1289/1805/11117/10491/60681/1303/165/4060/84171/4320/1278/871/6423/1277/4017/84695/7837/1311/4016/26585/1545/50509/63894/7092/7373"
match(expanlimpreerrPAgsea[1:5,]$core_enrichment, expanlimpreerallEntrez$SYMBOL)
## [1] NA NA NA NA NA
This code chunk performs GSEA on the ttexpanpreer
object; an object describing the expression differences between ER+ E
tumours and ER+ NE tumours.
sDPpretnbc = sampleDataPatient[c(2:3,6:8,11:12,16,21,25:26),]
expDatpretnbc = expDatpre[,c(2:3,6:8,11:12,16,21,25:26)]
expanpretnbc = sDPpretnbc$expansion
expanpretnbc = as.matrix(expanpretnbc)
expanpretnbc = ifelse(sDPpretnbc$expansion=="E",1,2)
expanpretnbc = as.matrix(expanpretnbc)
rownames(expanpretnbc) = rownames(sDPpretnbc)
designexpanrownamespretnbc = sort(rownames(expanpretnbc))
designexpanpretnbc = model.matrix(~expanpretnbc)
rownames(designexpanpretnbc) = designexpanrownamespretnbc
colnames(designexpanpretnbc) = c("Intercept", "NEis2")
expanlimpretnbc = lmFit(expDatpretnbc, designexpanpretnbc)
expanlimpretnbc = eBayes(expanlimpretnbc)
ttexpanpretnbc = topTable(expanlimpretnbc, coef=2, adjust.method = "BH", n=nrow(expDatpretnbc))
ttexpanpretnbc[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -5.497845 -1.3077502 -9.852600 5.699350e-07 0.01441252 2.5242235
## EBF3 4.838823 -2.3039121 7.272587 1.209106e-05 0.14816497 1.3428140
## LINC01484 -3.864636 -2.1709588 -6.993953 1.757731e-05 0.14816497 1.1711317
## SLC30A3 -3.803372 -1.8459799 -6.509260 3.450151e-05 0.21623945 0.8464402
## AL606807.1 -5.014666 -0.2066464 -6.359590 4.275535e-05 0.21623945 0.7390703
## EDA 3.613056 0.1180454 6.049604 6.731470e-05 0.22556775 0.5053754
## TSSK3 3.939649 -2.5363689 6.016979 7.066257e-05 0.22556775 0.4798655
## FOXJ1 4.967544 -2.2337008 6.010393 7.135962e-05 0.22556775 0.4746943
## AL138720.1 -4.569516 -1.8505591 -5.868717 8.826514e-05 0.24800542 0.3616880
## AC026369.3 -4.402078 -1.9266673 -5.590413 1.351205e-04 0.34169264 0.1296542
ttexpanpretnbc[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -5.497845 -1.3077502 -9.852600 5.699350e-07 0.01441252 2.5242235
## EBF3 4.838823 -2.3039121 7.272587 1.209106e-05 0.14816497 1.3428140
## LINC01484 -3.864636 -2.1709588 -6.993953 1.757731e-05 0.14816497 1.1711317
## SLC30A3 -3.803372 -1.8459799 -6.509260 3.450151e-05 0.21623945 0.8464402
## AL606807.1 -5.014666 -0.2066464 -6.359590 4.275535e-05 0.21623945 0.7390703
## EDA 3.613056 0.1180454 6.049604 6.731470e-05 0.22556775 0.5053754
## TSSK3 3.939649 -2.5363689 6.016979 7.066257e-05 0.22556775 0.4798655
## FOXJ1 4.967544 -2.2337008 6.010393 7.135962e-05 0.22556775 0.4746943
## AL138720.1 -4.569516 -1.8505591 -5.868717 8.826514e-05 0.24800542 0.3616880
## AC026369.3 -4.402078 -1.9266673 -5.590413 1.351205e-04 0.34169264 0.1296542
##
ttexpanpretnbcsig=ttexpanpretnbc[ttexpanpretnbc$adj.P.Val<0.05,]
ttexpanpretnbcsig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -5.497845 -1.30775 -9.8526 5.69935e-07 0.01441252 2.524224
## NA NA NA NA NA NA NA
## NA.1 NA NA NA NA NA NA
## NA.2 NA NA NA NA NA NA
## NA.3 NA NA NA NA NA NA
## NA.4 NA NA NA NA NA NA
## NA.5 NA NA NA NA NA NA
## NA.6 NA NA NA NA NA NA
## NA.7 NA NA NA NA NA NA
## NA.8 NA NA NA NA NA NA
rownames(ttexpanpretnbcsig)
## [1] "HIST1H2AI"
match("SLC4A1", rownames(ttexpanpretnbc))
## [1] 83
ttexpanpretnbc[83,]
## logFC AveExpr t P.Value adj.P.Val B
## SLC4A1 4.578623 -2.445839 3.923828 0.00216389 0.4236858 -1.555428
match("CD163L1", rownames(ttexpanpretnbc))
## [1] 13900
ttexpanpretnbc[13900,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 -0.8979793 1.481341 -0.7720382 0.4555772 0.8287393 -5.153734
match("AL031733.2", rownames(ttexpanpretnbc))
## [1] 542
ttexpanpretnbc[542,]
## logFC AveExpr t P.Value adj.P.Val B
## AL031733.2 2.749687 -3.443441 2.747111 0.01822556 0.4236858 -3.012026
match("CREG1", rownames(ttexpanpretnbc))
## [1] 24360
ttexpanpretnbc[24360,]
## logFC AveExpr t P.Value adj.P.Val B
## CREG1 0.02392132 5.954767 0.05454233 0.9574338 0.9938808 -5.404491
match("CD247", rownames(ttexpanpretnbc))
## [1] 12558
ttexpanpretnbc[12558,]
## logFC AveExpr t P.Value adj.P.Val B
## CD247 -0.7494689 5.879397 -0.896188 0.388424 0.7821679 -5.069613
topgene = match(rownames(ttexpanpretnbc)[1], rownames( expDatpretnbc))
t.test (expDatpretnbc[topgene,]~designexpanpretnbc[,2])
##
## Welch Two Sample t-test
##
## data: expDatpretnbc[topgene, ] by designexpanpretnbc[, 2]
## t = 10.267, df = 8.8823, p-value = 3.18e-06
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## 4.283988 6.711702
## sample estimates:
## mean in group 1 mean in group 2
## 1.691074 -3.806770
expDatpretnbc[18346,]
## T_BIOKEY_10_Pre T_BIOKEY_11_Pre T_BIOKEY_14_Pre T_BIOKEY_15_Pre T_BIOKEY_16_Pre
## 1.3528008 1.3422312 -4.3294198 0.9451816 1.9593753
## T_BIOKEY_19_Pre T_BIOKEY_2_Pre T_BIOKEY_26_Pre T_BIOKEY_31_Pre T_BIOKEY_8_Pre
## -4.8881896 -2.8683144 -4.1257380 2.8557819 -2.2277830
## T_BIOKEY_9_Pre
## -4.4011782
ttexpanpretnbc[1,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -5.497845 -1.30775 -9.8526 5.69935e-07 0.01441252 2.524224
In this analysis I also used pre-treatment tumour samples, now comparing TNBC E vs TNBC NE. I was hoping that there may be some overlap in the results between this analysis and the previous analysis but this was not the case.
From this we can see that group 1 (E) had a higher mean expression than group 2 (NE). Because the logFC is -5, this means that the logFC function is showing the relative FC of group 2 compared to group 1, meaning that a negative logFC means that group 1 is higher than group 2.
ttexpanpretnbcnames = rownames(ttexpanpretnbc[1:200,])
##
expanlimpretnbcstats = expanlimpretnbc$t[,2]
expanlimpretnbcstats %>% sort(., decreasing=TRUE) %>% head()
## EBF3 EDA TSSK3 FOXJ1 GPR20 NXPH3
## 7.272587 6.049604 6.016979 6.010393 5.449755 5.251460
expanlimpretnbcallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpretnbcstats), keytype="SYMBOL", columns="ENTREZID")
names(expanlimpretnbcstats) = expanlimpretnbcallEntrez$ENTREZID[ match(names(expanlimpretnbcstats), expanlimpretnbcallEntrez$SYMBOL)]
expanlimpretnbcstats %>% sort(., decreasing=TRUE) %>% head()
## 253738 1896 81629 2302 2843 11248
## 7.272587 6.049604 6.016979 6.010393 5.449755 5.251460
expanlimpretnbcstats_sort = sort(expanlimpretnbcstats, decreasing=TRUE)
expanlimpretnbcrPAgsea = gseGO(expanlimpretnbcstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (16.41% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpretnbcrPAgsea[1:5,] %>% knitr::kable()
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalues | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:0050871 | GO:0050871 | positive regulation of B cell activation | 144 | -0.4979054 | -2.650106 | 0.0012547 | 0.0131793 | 0.0095665 | 4523 | tags=44%, list=18%, signal=36% | 28439/79155/8741/28391/28638/28426/28432/3493/28414/10451/102723170/28454/28395/10673/30009/7494/28444/28392/55795/4292/100/28461/28455/100423062/28394/3574/148022/57289/56941/952/939/2048/28434/51111/28473/28412/3503/7293/28452/3538/64127/3500/7292/28467/7037/3539/28396/28400/28442/28408/3501/84787/28429/28388/28449/3514/79915/3502/28385/3507/28386/28420/59067 |
| GO:0002228 | GO:0002228 | natural killer cell mediated immunity | 75 | -0.5479287 | -2.604583 | 0.0007042 | 0.0131793 | 0.0095665 | 7234 | tags=69%, list=29%, signal=50% | 1054/4068/5817/5777/387837/5272/56253/7409/634/3133/114836/6777/11126/2214/203068/23207/3952/10125/201294/146850/259197/3823/1130/57823/3824/3105/22914/55207/10225/6845/100507436/5052/2671/10666/9437/4818/127544/5873/3965/3135/3106/3822/3134/3821/3592/84868/116449/3805/8302/3902/3002/59067 |
| GO:0002715 | GO:0002715 | regulation of natural killer cell mediated immunity | 50 | -0.5915431 | -2.601694 | 0.0005429 | 0.0131793 | 0.0095665 | 7234 | tags=74%, list=29%, signal=53% | 4068/5817/387837/5272/56253/7409/634/3133/114836/6777/11126/3952/10125/146850/259197/3823/3824/3105/22914/10225/100507436/2671/10666/9437/3965/3135/3106/3822/3134/3821/3592/84868/116449/3805/8302/3902/59067 |
| GO:0099024 | GO:0099024 | plasma membrane invagination | 129 | -0.4980004 | -2.597652 | 0.0011403 | 0.0131793 | 0.0095665 | 4364 | tags=41%, list=17%, signal=34% | 28391/28638/28426/28432/3493/28414/102723170/28454/28395/6845/2209/28444/4481/28392/718/28461/8685/28455/100423062/51429/28394/94134/246/57289/1182/9212/54209/3684/28434/28473/28412/3503/28452/3538/54784/3500/28467/3539/28396/28400/28442/28408/3501/28429/28388/28449/3514/3502/28385/3507/28386/28420/84501 |
| GO:0042267 | GO:0042267 | natural killer cell mediated cytotoxicity | 72 | -0.5441619 | -2.580646 | 0.0006780 | 0.0131793 | 0.0095665 | 7371 | tags=69%, list=29%, signal=49% | 1054/4068/5817/5777/387837/5272/56253/7409/634/3133/114836/6777/11126/2214/203068/23207/3952/10125/201294/146850/259197/3823/1130/57823/3824/3105/22914/55207/6845/100507436/5052/2671/10666/9437/4818/127544/5873/3965/3135/3106/3822/3134/3821/3592/84868/3805/8302/3902/3002/59067 |
This code chunk performs GSEA on the ttexpanpretnbc
object; an object describing the expression differences between TNBC E
tumours and ER+ NE tumours.
# incorrect now: expDatoncan = expDatcan[,c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61)]
load(file = "Cancercells.R", verbose = T)
## Loading objects:
## delta2000can
## brcaDatcancer
## expDatcan
## sampleDataPatientcan
## deltagcan
## sampleData3can
expDatprecan = expDatcan[,c(2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,43,45,47,49, 51, 53,55,57,59,61)]
delta2000can = delta2000can[,c(1:12,14:15,17,19:21,23:28,30:31)]
sampleDataPatientcan = sampleDataPatientcan[c(1:12,14:15,17,19:21,23:28,30:31),]
deltagcan = deltagcan[,c(1:12,14:15,17,19:21,23:28,30:31)]
expDatprecan = expDatprecan[,c(1:12,14:15,17,19:21,23:28,30:31)]
head(expDatprecan)
## BIOKEY_1_Pre BIOKEY_10_Pre BIOKEY_11_Pre BIOKEY_12_Pre BIOKEY_13_Pre
## A1BG 5.189745 5.778444 5.3804580 7.837913 5.6578037
## A1BG-AS1 -2.904325 3.237434 -0.8377502 1.824113 0.3086709
## A2M 5.112514 6.360532 3.1922818 2.491418 6.1229425
## A2M-AS1 -2.904325 1.603547 -2.3194404 -1.301041 0.3086709
## A4GALT 3.334154 2.854213 -2.3194404 -1.301041 2.3863003
## AAAS 5.598131 5.000118 4.9243512 5.646514 2.8567338
## BIOKEY_14_Pre BIOKEY_15_Pre BIOKEY_16_Pre BIOKEY_17_Pre BIOKEY_18_Pre
## A1BG 5.618258 4.889832 3.808299 7.519939 5.9619407
## A1BG-AS1 -1.287484 1.687723 -1.997482 2.272714 3.8049197
## A2M 5.407930 7.001283 5.865341 -1.718659 6.4816015
## A2M-AS1 -1.287484 -3.138754 1.200966 -1.718659 0.3088695
## A4GALT 2.724065 3.576517 4.871761 3.194614 0.3088695
## AAAS 4.331908 4.498659 4.086608 4.206636 0.3088695
## BIOKEY_19_Pre BIOKEY_2_Pre BIOKEY_21_Pre BIOKEY_22_Pre BIOKEY_24_Pre
## A1BG 4.223311 6.755717 6.951614 6.036091 6.8242042
## A1BG-AS1 1.283304 4.266240 2.999935 2.643389 -0.7336224
## A2M 6.840791 10.010324 5.643961 5.758353 5.4778787
## A2M-AS1 -1.943101 -2.260760 -2.301855 1.530182 -0.7336224
## A4GALT -1.943101 -2.260760 5.298380 -0.634512 -0.7336224
## AAAS 3.524348 3.781572 4.385602 1.702773 4.3562931
## BIOKEY_26_Pre BIOKEY_27_Pre BIOKEY_28_Pre BIOKEY_3_Pre BIOKEY_30_Pre
## A1BG 6.479746 2.819079 2.8176785 5.795977 6.626559
## A1BG-AS1 2.642840 1.469418 -0.3561496 4.733205 3.030535
## A2M 8.802318 5.313485 7.3923511 5.024763 2.428164
## A2M-AS1 1.529933 -1.587742 -0.3561496 1.851863 3.177939
## A4GALT 2.088355 4.029231 -0.3561496 1.134656 -1.905051
## AAAS 3.783555 5.010529 5.0527833 1.851863 4.996336
## BIOKEY_31_Pre BIOKEY_4_Pre BIOKEY_5_Pre BIOKEY_6_Pre BIOKEY_8_Pre
## A1BG 6.923018 5.059528 5.2189175 4.709453 1.062831
## A1BG-AS1 2.595002 2.912328 -0.3545796 -1.405573 4.126984
## A2M 6.664599 3.054358 3.2312428 5.302210 1.062831
## A2M-AS1 -3.357334 -2.008088 -0.3545796 -1.405573 1.062831
## A4GALT 5.923867 -2.008088 -0.3545796 -1.405573 2.781910
## AAAS 4.114240 4.086183 -0.3545796 4.564730 5.477825
## BIOKEY_9_Pre
## A1BG 5.041679
## A1BG-AS1 1.976117
## A2M 8.587659
## A2M-AS1 -3.568063
## A4GALT 4.871578
## AAAS 4.257223
sDPpreercan = sampleDataPatientcan[c(4,9:10,13:15,17,19:20,22:24),]
expDatpreercan = expDatprecan[,c(4,9:10,13:15,17,19:20,22:24)]
expanpreercan = sDPpreercan$expansion
expanpreercan = as.matrix(expanpreercan)
expanpreercan = ifelse(sDPpreercan$expansion=="E",1,2)
expanpreercan = as.matrix(expanpreercan)
rownames(expanpreercan) = rownames(sDPpreercan)
designexpanrownamespreercan = sort(rownames(expanpreercan))
designexpanpreercan = model.matrix(~expanpreercan)
rownames(designexpanpreercan) = designexpanrownamespreercan
colnames(designexpanpreercan) = c("Intercept", "NEis2")
expanlimpreercan = lmFit(expDatpreercan, designexpanpreercan)
expanlimpreercan = eBayes(expanlimpreercan)
ttexpanpreercan = topTable(expanlimpreercan, coef=2, adjust.method = "BH", n=nrow(expDatpreercan))
View(ttexpanpreercan[1:33,])
ttexpanpreercansig=ttexpanpreercan[ttexpanpreercan$adj.P.Val<0.05,]
ttexpanpreercansig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## STX8 5.081200 4.262730 8.939391 6.878462e-07 0.01011336 5.495693
## TIMM22 5.152573 3.415512 8.819872 7.998544e-07 0.01011336 5.385673
## ARHGEF7 3.962487 3.294395 7.929888 2.583798e-06 0.01883735 4.505027
## MKRN2 3.585640 3.068773 7.673424 3.684436e-06 0.01883735 4.230009
## PARP3 4.200447 3.860108 7.654049 3.785764e-06 0.01883735 4.208828
## BORCS5 4.458877 2.895241 7.486545 4.795736e-06 0.01883735 4.023287
## DHRS12 4.383978 2.839067 7.328885 6.010422e-06 0.01883735 3.844644
## FBXO34 3.931405 2.499637 7.282181 6.430006e-06 0.01883735 3.790968
## DNAJC3-DT 3.369904 2.874785 7.253374 6.704212e-06 0.01883735 3.757687
## SRP14-AS1 3.896434 2.473408 7.067411 8.801096e-06 0.02225621 3.539626
rownames(ttexpanpreercansig)
## [1] "STX8" "TIMM22" "ARHGEF7" "MKRN2" "PARP3" "BORCS5"
## [7] "DHRS12" "FBXO34" "DNAJC3-DT" "SRP14-AS1" "URGCP" "ARL14EP"
## [13] "CD163L1" "TCF4" "PIGBOS1" "HUS1" "WASHC3" "MEPCE"
## [19] "SEPT10" "KIAA0753" "C15orf48" "NUDT15" "CARMIL1" "TRPM7"
## [25] "NAV1" "UBE2D4" "ESCO1" "FKBP9" "GALT" "UACA"
## [31] "IL13RA1" "IGLV5-45"
match("SLC4A1", rownames(ttexpanpreercan))
## [1] 942
ttexpanpreercan[942,]
## logFC AveExpr t P.Value adj.P.Val B
## SLC4A1 -2.053596 -0.9681969 -2.968602 0.01095174 0.1460704 -2.658614
match("CD163L1", rownames(ttexpanpreercan))
## [1] 13
ttexpanpreercan[13,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 4.545983 2.96057 6.731923 1.454488e-05 0.02718451 3.131889
match("HIST1H2AI", rownames(ttexpanpreercan))
## [1] 10855
ttexpanpreercan[10855,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -2.128142 -0.3650483 -1.706752 0.1118023 0.2604566 -4.673713
topgene = match(rownames(ttexpanpreercan)[1], rownames( expDatpreercan))
t.test (expDatpreercan[topgene,]~designexpanpreercan[,2])
##
## Welch Two Sample t-test
##
## data: expDatpreercan[topgene, ] by designexpanpreercan[, 2]
## t = -8.8761, df = 3.0804, p-value = 0.002716
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -6.876416 -3.285984
## sample estimates:
## mean in group 1 mean in group 2
## 0.4518303 5.5330304
topgene
## [1] 13732
expDatpreercan[topgene,]
## BIOKEY_12_Pre BIOKEY_17_Pre BIOKEY_18_Pre BIOKEY_21_Pre BIOKEY_22_Pre
## 1.4012010 6.6023405 0.3088695 4.8386326 4.4560160
## BIOKEY_24_Pre BIOKEY_27_Pre BIOKEY_3_Pre BIOKEY_30_Pre BIOKEY_4_Pre
## 5.3912070 6.6896995 4.8779145 5.8536729 5.7091740
## BIOKEY_5_Pre BIOKEY_6_Pre
## -0.3545796 5.3786163
expDatpreercan["STX8",]
## BIOKEY_12_Pre BIOKEY_17_Pre BIOKEY_18_Pre BIOKEY_21_Pre BIOKEY_22_Pre
## 1.4012010 6.6023405 0.3088695 4.8386326 4.4560160
## BIOKEY_24_Pre BIOKEY_27_Pre BIOKEY_3_Pre BIOKEY_30_Pre BIOKEY_4_Pre
## 5.3912070 6.6896995 4.8779145 5.8536729 5.7091740
## BIOKEY_5_Pre BIOKEY_6_Pre
## -0.3545796 5.3786163
ttexpanpreercan[1,]
## logFC AveExpr t P.Value adj.P.Val B
## STX8 5.0812 4.26273 8.939391 6.878462e-07 0.01011336 5.495693
designexpanpreercan
## Intercept NEis2
## BIOKEY_12_Pre 1 1
## BIOKEY_17_Pre 1 2
## BIOKEY_18_Pre 1 1
## BIOKEY_21_Pre 1 2
## BIOKEY_22_Pre 1 2
## BIOKEY_24_Pre 1 2
## BIOKEY_27_Pre 1 2
## BIOKEY_3_Pre 1 2
## BIOKEY_30_Pre 1 2
## BIOKEY_4_Pre 1 2
## BIOKEY_5_Pre 1 1
## BIOKEY_6_Pre 1 2
## attr(,"assign")
## [1] 0 1
From here, group 1 (E) gas a lower expression than group 2 (NE).
ttexpanpreernamescan = rownames(ttexpanpreercan[1:200,])
##
expanlimpreercanstats = expanlimpreercan$t[,2]
expanlimpreercanstats %>% sort(., decreasing=TRUE) %>% head()
## STX8 TIMM22 ARHGEF7 MKRN2 PARP3 BORCS5
## 8.939391 8.819872 7.929888 7.673424 7.654049 7.486545
expanlimpreercanallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpreercanstats), keytype="SYMBOL", columns="ENTREZID")
names(expanlimpreercanstats) = expanlimpreercanallEntrez$ENTREZID[ match(names(expanlimpreercanstats), expanlimpreercanallEntrez$SYMBOL)]
expanlimpreercanstats %>% sort(., decreasing=TRUE) %>% head()
## 9482 29928 8874 23609 10039 118426
## 8.939391 8.819872 7.929888 7.673424 7.654049 7.486545
expanlimpreercanstats_sort = sort(expanlimpreercanstats, decreasing=TRUE)
expanlimpreercanrPAgsea = gseGO(expanlimpreercanstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (33.75% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpreercanrPAgsea[1:5,] %>% knitr::kable()
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalues | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:0002181 | GO:0002181 | cytoplasmic translation | 142 | 0.6319338 | 3.545761 | 0.0011050 | 0.0058476 | 0.0029331 | 5444 | tags=68%, list=22%, signal=54% | 56339/8891/10605/10492/8664/4733/6191/6173/6154/51611/7458/2332/6194/6124/170506/6139/51389/6168/6170/8661/6207/6160/8668/6134/92715/200916/6176/1983/1968/6138/6235/6202/6147/6122/80315/6209/4736/9147/5936/6208/8894/6228/6229/1939/10480/6232/7311/6223/6142/6159/6167/6234/3184/6164/55854/6146/6233/1973/6152/6136/6175/6161/3921/11224/120526/26986/6143/6205/6189/6222/132864/6188/6171/23521/6217/6125/6137/6210/6141/1660/6193/6130/6203/6165/6135/89978/6133/6144/9045/1801/8531/6132/6128/6206/6230/1974/25873 |
| GO:0035966 | GO:0035966 | response to topologically incorrect protein | 155 | 0.6135017 | 3.473464 | 0.0012469 | 0.0058476 | 0.0029331 | 4578 | tags=50%, list=18%, signal=41% | 101928527/51009/91445/871/7057/118424/5611/54788/5771/3338/5295/1388/23197/4287/57414/5610/3304/55432/65992/666/10956/22926/51528/55768/64764/1191/10551/9531/7009/387923/4780/3337/55161/2161/3303/23071/55741/10808/3309/7353/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/153222/9451/23645/9217/10347/1649/79956/11080/80279/10133/55757/3297/92305/51283/4189/1861/7873/27230/3320/3315/54870/10488/84236/89890/26608/55658/79139/84919 |
| GO:0006986 | GO:0006986 | response to unfolded protein | 134 | 0.6052674 | 3.377031 | 0.0010352 | 0.0058476 | 0.0029331 | 4578 | tags=51%, list=18%, signal=42% | 101928527/51009/871/7057/118424/5611/5771/3338/5295/1388/23197/57414/5610/3304/55432/65992/666/10956/22926/51528/64764/10551/9531/7009/387923/4780/3337/55161/3303/23071/55741/10808/3309/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/153222/9451/23645/9217/10347/1649/79956/11080/80279/10133/3297/92305/51283/4189/7873/27230/3320/3315/54870/10488/84236/89890/26608/79139/84919 |
| GO:0035967 | GO:0035967 | cellular response to topologically incorrect protein | 113 | 0.6168627 | 3.319020 | 0.0009124 | 0.0058476 | 0.0029331 | 4578 | tags=51%, list=18%, signal=42% | 101928527/51009/91445/54788/5771/5295/1388/4287/57414/5610/3304/55432/65992/666/10956/22926/55768/64764/10551/9531/7009/387923/4780/55161/3303/3309/7353/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/9451/23645/9217/10347/1649/79956/80279/10133/55757/3297/51283/4189/1861/27230/54870/10488/84236/26608/55658/79139/84919 |
| GO:0030968 | GO:0030968 | endoplasmic reticulum unfolded protein response | 73 | 0.6453638 | 3.243673 | 0.0006337 | 0.0058476 | 0.0029331 | 5085 | tags=63%, list=20%, signal=50% | 101928527/51009/5771/5295/1388/57414/5610/55432/65992/666/10956/22926/64764/10551/387923/4780/55161/3309/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/9451/23645/9217/10347/1649/79956/80279/51283/4189/27230/54870/10488/26608/79139/84919/10399/30001/595 |
sDPpretnbccan = sampleDataPatientcan[c(2:3,6:8,11:12,16,21,25:26),]
expDatpretnbccan = expDatprecan[,c(2:3,6:8,11:12,16,21,25:26)]
expanpretnbccan = sDPpretnbccan$expansion
expanpretnbccan = as.matrix(expanpretnbccan)
expanpretnbccan = ifelse(sDPpretnbccan$expansion=="E",1,2)
expanpretnbccan = as.matrix(expanpretnbccan)
rownames(expanpretnbccan) = rownames(sDPpretnbccan)
designexpanrownamespretnbccan = sort(rownames(expanpretnbccan))
designexpanpretnbccan = model.matrix(~expanpretnbccan)
rownames(designexpanpretnbccan) = designexpanrownamespretnbccan
colnames(designexpanpretnbccan) = c("Intercept", "NEis2")
expanlimpretnbccan = lmFit(expDatpretnbccan, designexpanpretnbccan)
expanlimpretnbccan = eBayes(expanlimpretnbccan)
ttexpanpretnbccan = topTable(expanlimpretnbccan, coef=2, adjust.method = "BH", n=nrow(expDatpretnbccan))
ttexpanpretnbccan[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## LINC01133 -5.770864 1.32930502 -5.906893 3.427880e-05 0.5206684 -4.538141
## RNF128 5.677530 0.15914761 5.397415 8.577419e-05 0.5206684 -4.541577
## SPIRE2 -4.440320 0.05592086 -5.054427 1.621878e-04 0.5206684 -4.544215
## CXCL13 -6.171653 2.10934256 -4.940096 2.012310e-04 0.5206684 -4.545158
## NXPH3 4.004697 -1.04199544 4.750414 2.888288e-04 0.5206684 -4.546798
## AP001453.2 -4.198955 2.58323242 -4.604544 3.824539e-04 0.5206684 -4.548124
## PHYHD1 5.080545 1.88104398 4.565342 4.125962e-04 0.5206684 -4.548490
## CCNE2 -3.594409 2.46574215 -4.485047 4.822055e-04 0.5206684 -4.549254
## CES3 -4.378754 0.02793643 -4.471602 4.949919e-04 0.5206684 -4.549384
## SLC4A1 4.826934 -0.59350249 4.390418 5.799620e-04 0.5206684 -4.550179
ttexpanpretnbccan[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## LINC01133 -5.770864 1.32930502 -5.906893 3.427880e-05 0.5206684 -4.538141
## RNF128 5.677530 0.15914761 5.397415 8.577419e-05 0.5206684 -4.541577
## SPIRE2 -4.440320 0.05592086 -5.054427 1.621878e-04 0.5206684 -4.544215
## CXCL13 -6.171653 2.10934256 -4.940096 2.012310e-04 0.5206684 -4.545158
## NXPH3 4.004697 -1.04199544 4.750414 2.888288e-04 0.5206684 -4.546798
## AP001453.2 -4.198955 2.58323242 -4.604544 3.824539e-04 0.5206684 -4.548124
## PHYHD1 5.080545 1.88104398 4.565342 4.125962e-04 0.5206684 -4.548490
## CCNE2 -3.594409 2.46574215 -4.485047 4.822055e-04 0.5206684 -4.549254
## CES3 -4.378754 0.02793643 -4.471602 4.949919e-04 0.5206684 -4.549384
## SLC4A1 4.826934 -0.59350249 4.390418 5.799620e-04 0.5206684 -4.550179
ttexpanpretnbccansig=ttexpanpretnbccan[ttexpanpretnbccan$adj.P.Val<0.05,]
ttexpanpretnbccansig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## NA NA NA NA NA NA NA
## NA.1 NA NA NA NA NA NA
## NA.2 NA NA NA NA NA NA
## NA.3 NA NA NA NA NA NA
## NA.4 NA NA NA NA NA NA
## NA.5 NA NA NA NA NA NA
## NA.6 NA NA NA NA NA NA
## NA.7 NA NA NA NA NA NA
## NA.8 NA NA NA NA NA NA
## NA.9 NA NA NA NA NA NA
rownames(ttexpanpretnbccansig)
## character(0)
match("PTN", rownames(ttexpanpretnbccan))
## [1] 26
ttexpanpretnbccan[26,]
## logFC AveExpr t P.Value adj.P.Val B
## PTN 4.856942 3.438396 4.005355 0.001239745 0.5206684 -4.554211
match("SLC4A1", rownames(ttexpanpretnbccan))
## [1] 10
ttexpanpretnbccan[10,]
## logFC AveExpr t P.Value adj.P.Val B
## SLC4A1 4.826934 -0.5935025 4.390418 0.000579962 0.5206684 -4.550179
match("CD163L1", rownames(ttexpanpretnbccan))
## [1] 17601
ttexpanpretnbccan[17601,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 -0.6759196 2.715055 -0.5248446 0.6076904 0.8730681 -4.599233
match("HIST1H2AI", rownames(ttexpanpretnbccan))
## [1] 97
ttexpanpretnbccan[97,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI -3.577364 -0.3363316 -3.288589 0.00522041 0.5206684 -4.562914
topgene = match(rownames(ttexpanpretnbccan)[1], rownames( expDatpretnbccan))
t.test (expDatpretnbccan[topgene,]~designexpanpretnbccan[,2])
##
## Welch Two Sample t-test
##
## data: expDatpretnbccan[topgene, ] by designexpanpretnbccan[, 2]
## t = 5.7723, df = 8.8007, p-value = 0.0002924
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## 3.501445 8.040282
## sample estimates:
## mean in group 1 mean in group 2
## 4.477049 -1.293815
topgene
## [1] 8041
expDatpretnbccan[topgene,]
## BIOKEY_10_Pre BIOKEY_11_Pre BIOKEY_14_Pre BIOKEY_15_Pre BIOKEY_16_Pre
## 6.5472501 2.8044262 -1.2874836 5.2619773 2.8906423
## BIOKEY_19_Pre BIOKEY_2_Pre BIOKEY_26_Pre BIOKEY_31_Pre BIOKEY_8_Pre
## -1.9431012 -2.2607596 0.2336873 4.8809486 1.0628306
## BIOKEY_9_Pre
## -3.5680628
ttexpanpretnbccan[1,]
## logFC AveExpr t P.Value adj.P.Val B
## LINC01133 -5.770864 1.329305 -5.906893 3.42788e-05 0.5206684 -4.538141
designexpanpretnbccan
## Intercept NEis2
## BIOKEY_10_Pre 1 1
## BIOKEY_11_Pre 1 1
## BIOKEY_14_Pre 1 2
## BIOKEY_15_Pre 1 1
## BIOKEY_16_Pre 1 1
## BIOKEY_19_Pre 1 2
## BIOKEY_2_Pre 1 2
## BIOKEY_26_Pre 1 2
## BIOKEY_31_Pre 1 1
## BIOKEY_8_Pre 1 2
## BIOKEY_9_Pre 1 2
## attr(,"assign")
## [1] 0 1
negative logFC: group 1 (E) is higher than group 2 (NE)
ttexpanpretnbcnamescan = rownames(ttexpanpretnbccan[1:200,])
##
expanlimpretnbccanstats = expanlimpretnbccan$t[,2]
expanlimpretnbccanstats %>% sort(., decreasing=TRUE) %>% head()
## RNF128 NXPH3 PHYHD1 SLC4A1 CHAD IL17RB
## 5.397415 4.750414 4.565342 4.390418 4.380305 4.324675
expanlimpretnbccanallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpretnbccanstats), keytype="SYMBOL", columns="ENTREZID")
names(expanlimpretnbccanstats) = expanlimpretnbccanallEntrez$ENTREZID[ match(names(expanlimpretnbccanstats), expanlimpretnbccanallEntrez$SYMBOL)]
expanlimpretnbccanstats %>% sort(., decreasing=TRUE) %>% head()
## 79589 11248 254295 6521 1101 55540
## 5.397415 4.750414 4.565342 4.390418 4.380305 4.324675
expanlimpretnbccanstats_sort = sort(expanlimpretnbccanstats, decreasing=TRUE)
expanlimpretnbccanrPAgsea = gseGO(expanlimpretnbccanstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (23.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpretnbccanrPAgsea[1:5,] %>% knitr::kable()
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalues | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:0006119 | GO:0006119 | oxidative phosphorylation | 117 | -0.5622125 | -3.185613 | 0.0010030 | 0.0091503 | 0.0054669 | 5575 | tags=58%, list=22%, signal=46% | 4728/6391/513/10105/4715/4702/27089/539/51085/10632/7384/51142/1327/55967/4701/7388/1339/4708/522/374291/9167/29796/4710/4714/79736/4694/1329/4726/10476/9551/4711/2395/637/4704/509/57017/7386/84300/2631/201626/4697/170712/7385/4716/4706/1716/400916/4724/26471/1738/983/4713/4709/891/790955/440567/92014/4718/4436/4722/205327/4700/54949/79085/215/6392/84275/246269 |
| GO:0022904 | GO:0022904 | respiratory electron transport chain | 96 | -0.5674612 | -3.102089 | 0.0008354 | 0.0091503 | 0.0054669 | 5152 | tags=55%, list=20%, signal=44% | 4715/4702/27089/83733/51142/1327/55967/4701/7388/144363/1339/4708/374291/9167/6648/29796/4710/11232/2819/4714/4694/1329/4726/4711/637/4704/57017/7386/2110/4697/4716/4706/79751/1716/4724/6390/1738/983/4713/4709/891/790955/440567/92014/4718/4722/2629/8604/4700/54949/6392/2109/246269 |
| GO:0048002 | GO:0048002 | antigen processing and presentation of peptide antigen | 58 | -0.6285682 | -3.100335 | 0.0005731 | 0.0091503 | 0.0054669 | 4317 | tags=59%, list=17%, signal=49% | 1785/811/3115/3105/50856/3112/3109/3133/6892/51752/563/1636/3113/3123/6556/3416/972/29108/54842/567/3122/3117/6890/6891/3111/3108/3106/3127/3107/23457/3119/3134/2207/3135 |
| GO:0033108 | GO:0033108 | mitochondrial respiratory chain complex assembly | 87 | -0.5700827 | -3.057271 | 0.0007831 | 0.0091503 | 0.0054669 | 6366 | tags=61%, list=25%, signal=46% | 54902/57001/6834/25813/29078/4728/84987/29090/4715/4702/51241/100303755/55967/6341/4708/493753/374291/28976/131474/90871/4710/100131801/4714/116228/4694/55471/4711/4704/7386/54539/51204/84300/126328/4716/285521/4706/4724/4713/4709/790955/51287/4718/4722/284184/137682/84233/4700/54949/51079/9131/84275/55863/79072 |
| GO:0010257 | GO:0010257 | NADH dehydrogenase complex assembly | 52 | -0.6361512 | -3.048276 | 0.0005559 | 0.0091503 | 0.0054669 | 5575 | tags=58%, list=22%, signal=45% | 4728/29090/4715/4702/55967/4708/374291/28976/90871/4710/4714/4694/55471/4711/4704/54539/126328/4716/4706/4724/4713/4709/4718/4722/284184/137682/84233/4700/51079/55863 |
setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
load(file = "Tcells.R", verbose = T)
## Loading objects:
## delta2000tcell
## brcaDattcell
## expDattcell
## sampleDataPatienttcell
## sampleData3tcell
## expDattcellpre
## expDattcellon
colnames(delta2000tcell)
## [1] "T_BIOKEY1delta" "T_BIOKEY10delta" "E_BIOKEY11delta" "E_BIOKEY12delta"
## [5] "E_BIOKEY13delta" "E_BIOKEY614delta" "E_BIOKEY15delta" "T_BIOKEY16delta"
## [9] "T_BIOKEY17delta" "T_BIOKEY18delta" "T_BIOKEY19delta" "E_BIOKEY2delta"
## [13] "H_BIOKEY20delta" "T_BIOKEY21delta" "T_BIOKEY22delta" "T_BIOKEY23delta"
## [17] "E_BIOKEY24delta" "E_BIOKEY25delta" "T_BIOKEY26delta" "E_BIOKEY27delta"
## [21] "E_BIOKEY28delta" "E_BIOKEY29delta" "H_BIOKEY3delta" "E_BIOKEY30delta"
## [25] "T_BIOKEY31delta" "T_BIOKEY4delta" "E_BIOKEY5delta" "H_BIOKEY6delta"
## [29] "E_BIOKEY7delta" "E_BIOKEY8delta" "T_BIOKEY9delta"
dim(expDattcell)
## [1] 25288 62
rownames(sampleData3tcell)
## [1] "BIOKEY_13_Pre" "BIOKEY_10_Pre" "BIOKEY_16_Pre" "BIOKEY_14_Pre"
## [5] "BIOKEY_19_Pre" "BIOKEY_23_Pre" "BIOKEY_26_Pre" "BIOKEY_28_Pre"
## [9] "BIOKEY_3_Pre" "BIOKEY_15_Pre" "BIOKEY_8_Pre" "BIOKEY_5_Pre"
## [13] "BIOKEY_30_Pre" "BIOKEY_12_Pre" "BIOKEY_1_Pre" "BIOKEY_31_Pre"
## [17] "BIOKEY_20_Pre" "BIOKEY_22_Pre" "BIOKEY_25_Pre" "BIOKEY_21_Pre"
## [21] "BIOKEY_29_Pre" "BIOKEY_4_Pre" "BIOKEY_9_Pre" "BIOKEY_18_Pre"
## [25] "BIOKEY_11_Pre" "BIOKEY_7_Pre" "BIOKEY_2_Pre" "BIOKEY_6_Pre"
## [29] "BIOKEY_17_Pre" "BIOKEY_27_Pre" "BIOKEY_24_Pre"
colnames(expDattcellon)
## [1] "BIOKEY_1_On" "BIOKEY_10_On" "BIOKEY_12_On" "BIOKEY_13_On" "BIOKEY_14_On"
## [6] "BIOKEY_15_On" "BIOKEY_16_On" "BIOKEY_17_On" "BIOKEY_18_On" "BIOKEY_19_On"
## [11] "BIOKEY_2_On" "BIOKEY_20_On" "BIOKEY_21_On" "BIOKEY_22_On" "BIOKEY_23_On"
## [16] "BIOKEY_24_On" "BIOKEY_25_On" "BIOKEY_26_On" "BIOKEY_27_On" "BIOKEY_28_On"
## [21] "BIOKEY_29_On" "BIOKEY_3_On" "BIOKEY_30_On" "BIOKEY_31_On" "BIOKEY_4_On"
## [26] "BIOKEY_5_On" "BIOKEY_6_On" "BIOKEY_7_On" "BIOKEY_8_On" "BIOKEY_9_On"
delta2000tcell = delta2000tcell[,-c(13,16,18,22,29)]
sampleDataPatienttcell = sampleDataPatienttcell[-c(13,16,18,22,29),]
expDattcell = expDattcell[,-c(25:26,31:32,35:36,43:44,57:58)]
expDattcellpre = expDattcellpre[,-c(13,16,18,22,29)]
sampleData3tcell = sampleData3tcell[-c(6,17,19,21,26),]
sDPpreert = sampleDataPatienttcell[c(4,9:10,13:15,17,19:20,22:24),]
expDatpreert = expDattcellpre[,c(4,9:10,13:15,17,19:20,22:24)]
expanpreert = sDPpreert$expansion
expanpreert = as.matrix(expanpreert)
expanpreert = ifelse(sDPpreert$expansion=="E",1,2)
expanpreert = as.matrix(expanpreert)
rownames(expanpreert) = rownames(sDPpreert)
designexpanrownamespreert = sort(rownames(expanpreert))
designexpanpreert = model.matrix(~expanpreert)
rownames(designexpanpreert) = designexpanrownamespreert
colnames(designexpanpreert) = c("Intercept", "NEis2")
expanlimpreert = lmFit(expDatpreert, designexpanpreert)
expanlimpreert = eBayes(expanlimpreert)
ttexpanpreert = topTable(expanlimpreert, coef=2, adjust.method = "BH", n=nrow(expDatpreert))
ttexpanpreert[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## CCL3L1 -4.194562 2.959868 -3.865538 0.001361138 0.6776934 -4.553874
## IGHV3-48 -4.284575 1.226946 -3.662571 0.002090607 0.6776934 -4.556498
## ADAMTS10 2.899390 2.054158 3.585189 0.002462582 0.6776934 -4.557531
## SMPD3 3.672813 1.710439 3.535756 0.002734150 0.6776934 -4.558200
## ZYG11B 3.136617 3.031014 3.526968 0.002785462 0.6776934 -4.558319
## TYW1B 2.603277 2.530765 3.517660 0.002840869 0.6776934 -4.558446
## CD9 2.674077 4.797037 3.511949 0.002875405 0.6776934 -4.558524
## FAF1 -2.961785 2.784412 -3.378501 0.003812843 0.6776934 -4.560373
## ETV1 -3.142633 1.692693 -3.356584 0.003993530 0.6776934 -4.560682
## N4BP3 2.777218 3.124552 3.349226 0.004056079 0.6776934 -4.560786
ttexpanpreert[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## CCL3L1 -4.194562 2.959868 -3.865538 0.001361138 0.6776934 -4.553874
## IGHV3-48 -4.284575 1.226946 -3.662571 0.002090607 0.6776934 -4.556498
## ADAMTS10 2.899390 2.054158 3.585189 0.002462582 0.6776934 -4.557531
## SMPD3 3.672813 1.710439 3.535756 0.002734150 0.6776934 -4.558200
## ZYG11B 3.136617 3.031014 3.526968 0.002785462 0.6776934 -4.558319
## TYW1B 2.603277 2.530765 3.517660 0.002840869 0.6776934 -4.558446
## CD9 2.674077 4.797037 3.511949 0.002875405 0.6776934 -4.558524
## FAF1 -2.961785 2.784412 -3.378501 0.003812843 0.6776934 -4.560373
## ETV1 -3.142633 1.692693 -3.356584 0.003993530 0.6776934 -4.560682
## N4BP3 2.777218 3.124552 3.349226 0.004056079 0.6776934 -4.560786
ttexpanpreertsig=ttexpanpreert[ttexpanpreert$adj.P.Val<0.05,]
ttexpanpreertsig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## NA NA NA NA NA NA NA
## NA.1 NA NA NA NA NA NA
## NA.2 NA NA NA NA NA NA
## NA.3 NA NA NA NA NA NA
## NA.4 NA NA NA NA NA NA
## NA.5 NA NA NA NA NA NA
## NA.6 NA NA NA NA NA NA
## NA.7 NA NA NA NA NA NA
## NA.8 NA NA NA NA NA NA
## NA.9 NA NA NA NA NA NA
rownames(ttexpanpreertsig)
## character(0)
match("IQSEC2", rownames(ttexpanpreert))
## [1] 3254
ttexpanpreert[3254,]
## logFC AveExpr t P.Value adj.P.Val B
## IQSEC2 1.252203 -0.105018 1.291257 0.2148901 0.6776934 -4.591703
match("PDCD1", rownames(ttexpanpreert))
## [1] 1361
ttexpanpreert[1361,]
## logFC AveExpr t P.Value adj.P.Val B
## PDCD1 -1.859253 5.753456 -1.614609 0.1258668 0.6776934 -4.587265
match("CD163L1", rownames(ttexpanpreert))
## [1] 374
ttexpanpreert[374,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 1.894626 0.3767991 2.073418 0.05457495 0.6776934 -4.580301
match("LINC01133", rownames(ttexpanpreert))
## [1] 7469
ttexpanpreert[7469,]
## logFC AveExpr t P.Value adj.P.Val B
## LINC01133 0.9811407 -0.3083151 0.8518209 0.4068358 0.6776934 -4.596647
match("HIST1H2AI", rownames(ttexpanpreert))
## [1] 2503
ttexpanpreert[2503,]
## logFC AveExpr t P.Value adj.P.Val B
## HIST1H2AI 1.313073 -0.0593656 1.388782 0.1838579 0.6776934 -4.590422
topgene = match(rownames(ttexpanpreert)[1], rownames( expDattcellpre))
t.test (expDatpreert[topgene,]~designexpanpreert[,2])
##
## Welch Two Sample t-test
##
## data: expDatpreert[topgene, ] by designexpanpreert[, 2]
## t = 5.4166, df = 9.2394, p-value = 0.0003871
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## 2.449665 5.939460
## sample estimates:
## mean in group 1 mean in group 2
## 6.105790 1.911228
topgene
## [1] 3093
expDatpreert[topgene,]
## BIOKEY_12_Pre BIOKEY_17_Pre BIOKEY_18_Pre BIOKEY_21_Pre BIOKEY_22_Pre
## 6.4918874 0.3204857 5.2385971 0.9197805 1.3308503
## BIOKEY_24_Pre BIOKEY_27_Pre BIOKEY_3_Pre BIOKEY_30_Pre BIOKEY_4_Pre
## 4.3355261 4.8155313 -0.1254166 2.3972571 3.4542531
## BIOKEY_5_Pre BIOKEY_6_Pre
## 6.5868855 -0.2472196
ttexpanpreert[1,]
## logFC AveExpr t P.Value adj.P.Val B
## CCL3L1 -4.194562 2.959868 -3.865538 0.001361138 0.6776934 -4.553874
designexpanpreert
## Intercept NEis2
## BIOKEY_12_Pre 1 1
## BIOKEY_17_Pre 1 2
## BIOKEY_18_Pre 1 1
## BIOKEY_21_Pre 1 2
## BIOKEY_22_Pre 1 2
## BIOKEY_24_Pre 1 2
## BIOKEY_27_Pre 1 2
## BIOKEY_3_Pre 1 2
## BIOKEY_30_Pre 1 2
## BIOKEY_4_Pre 1 2
## BIOKEY_5_Pre 1 1
## BIOKEY_6_Pre 1 2
## attr(,"assign")
## [1] 0 1
Positive logFC: group 1 (E) has a lower expression than group 2 (NE)
ttexpanpreernamest = rownames(ttexpanpreert[1:200,])
##
expanlimpreertstats = expanlimpreert$t[,2]
expanlimpreertstats %>% sort(., decreasing=TRUE) %>% head()
## ADAMTS10 SMPD3 ZYG11B TYW1B CD9 N4BP3
## 3.585189 3.535756 3.526968 3.517660 3.511949 3.349226
expanlimpreertallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpreertstats), keytype="SYMBOL", columns="ENTREZID")
names(expanlimpreertstats) = expanlimpreertallEntrez$ENTREZID[ match(names(expanlimpreertstats), expanlimpreertallEntrez$SYMBOL)]
expanlimpreertstats %>% sort(., decreasing=TRUE) %>% head()
## 81794 55512 79699 441250 928 23138
## 3.585189 3.535756 3.526968 3.517660 3.511949 3.349226
expanlimpreertstats_sort = sort(expanlimpreertstats, decreasing=TRUE)
expanlimpreertrPAgsea = gseGO(expanlimpreertstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (36.38% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpreertrPAgsea[1:5,] %>% knitr::kable()
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalues | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:0007416 | GO:0007416 | synapse assembly | 171 | 0.4356335 | 2.372424 | 0.0001062 | 0.0023172 | 0.0009751 | 5965 | tags=51%, list=24%, signal=40% | 5020/266727/7476/84189/10160/22941/6495/2049/7097/64084/575/9231/54413/54549/57463/147381/6092/27253/3556/1948/221935/7855/2258/6712/56123/7058/27445/9423/56122/26167/64101/1946/57502/57717/4131/4897/6386/2239/145581/9379/7474/5789/3084/5818/27067/4915/43/119/577/23284/8927/140689/1000/1141/22943/2048/23768/23767/2555/2561/2562/2596/51738/10082/2823/91156/55203/4038/94030/257194/22871/4916/5021/56126/56125/56132/56131/56130/56127/85358/51804/139065/8128/79953/387804/266977/26280/26045 |
| GO:0051963 | GO:0051963 | regulation of synapse assembly | 97 | 0.4482314 | 2.235354 | 0.0001119 | 0.0023172 | 0.0009751 | 7451 | tags=58%, list=29%, signal=41% | 5020/266727/7476/84189/10160/6495/2049/7097/64084/575/9231/54413/57463/147381/6092/3556/7058/9423/1946/2239/145581/7474/5789/5818/27067/4915/577/1141/22943/2048/23768/23767/51738/10082/94030/257194/22871/4916/5021/51804/139065/8128/79953/387804/26280/26045/627/869/23316/2895/11141/6585/114798/84631/26050/89780 |
| GO:0050907 | GO:0050907 | detection of chemical stimulus involved in sensory perception | 85 | 0.4569601 | 2.225958 | 0.0001127 | 0.0023172 | 0.0009751 | 12933 | tags=93%, list=51%, signal=46% | 563/5304/1469/5284/50840/50832/26689/1470/50839/338557/259293/259292/83756/7442/143503/284521/81285/9033/8989/1258/727924/4025/26212/26659/123041/259295/259290/54429/765/79290/401427/4994/390429/50838/259289/81472/390072/283694/135924/80835/83597/2779/390445/390174/259286/1472/259296/390083/4993/390067/132112/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532 |
| GO:0002040 | GO:0002040 | sprouting angiogenesis | 123 | 0.4199129 | 2.181518 | 0.0001090 | 0.0023172 | 0.0009751 | 5787 | tags=42%, list=23%, signal=33% | 8829/7057/28514/187/161742/5170/6915/3791/57556/90993/196740/10014/1948/57608/2523/7422/161198/652/84830/79812/375056/10769/128240/84870/100506013/5228/7424/9037/144455/185/284/168667/29775/1012/54567/1969/2246/2303/51738/26585/4223/4804/5743/57381/6091/9723/9353/90627/6997/7010/63923/2277 |
| GO:0007156 | GO:0007156 | homophilic cell adhesion via plasma membrane adhesion molecules | 165 | 0.4024236 | 2.180987 | 0.0001065 | 0.0023172 | 0.0009751 | 10390 | tags=73%, list=41%, signal=43% | 83872/1009/5097/2195/11122/81607/64084/64221/91624/54549/1003/57463/6092/27253/221935/54510/55714/57526/9499/1004/56103/1824/56123/1952/54538/56122/26167/9620/57717/1001/56137/5098/26025/5365/9708/5818/5979/64403/90952/92211/57863/1012/1013/1000/60437/1006/4680/152330/1825/120114/79633/91156/84966/51294/5099/5101/56139/56145/56142/56134/56126/56125/56124/56121/56132/56131/56130/56129/56127/56105/56113/56112/56110/56108/56107/56104/56101/56100/56099/6091/128434/1048/56138/56144/56111/8641/53841/54798/2196/84665/28316/54825/84623/28513/27328/5100/56136/56146/56135/56128/56114/1005/152404/9752/1830/57575/56147/1008/1010/1823/1828/56102/56141/6900/56143/56140/29930/56098/1002/389118 |
sDPpretnbct = sampleDataPatienttcell[c(2:3,6:8,11:12,16,21,25:26),]
expDatpretnbct = expDattcellpre[,c(2:3,6:8,11:12,16,21,25:26)]
expanpretnbct = sDPpretnbct$expansion
expanpretnbct = as.matrix(expanpretnbct)
expanpretnbct = ifelse(sDPpretnbct$expansion=="E",1,2)
expanpretnbct = as.matrix(expanpretnbct)
rownames(expanpretnbct) = rownames(sDPpretnbct)
designexpanrownamespretnbct = sort(rownames(expanpretnbct))
designexpanpretnbct = model.matrix(~expanpretnbct)
rownames(designexpanpretnbct) = designexpanrownamespretnbct
colnames(designexpanpretnbct) = c("Intercept", "NEis2")
expanlimpretnbct = lmFit(expDatpretnbct, designexpanpretnbct)
expanlimpretnbct = eBayes(expanlimpretnbct)
ttexpanpretnbct = topTable(expanlimpretnbct, coef=2, adjust.method = "BH", n=nrow(expDatpretnbct))
ttexpanpretnbct[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## IL21 -4.337764 2.331703 -7.171387 1.450555e-05 0.2494635 -2.001404
## AL359834.1 -4.066904 1.959833 -6.744352 2.587588e-05 0.2494635 -2.086606
## HAVCR2 -3.688488 4.407431 -6.590269 3.206902e-05 0.2494635 -2.120037
## PDCD1 -2.018335 6.705807 -6.061818 6.856912e-05 0.2494635 -2.246902
## RDH10 -3.704796 1.855264 -5.905851 8.643703e-05 0.2494635 -2.288294
## ZBED2 -4.221219 3.003524 -5.474515 1.669260e-04 0.2494635 -2.413265
## LINC01229 -4.147459 0.748824 -5.376912 1.944379e-04 0.2494635 -2.443831
## GOLIM4 -3.082110 3.021727 -5.281352 2.260572e-04 0.2494635 -2.474625
## SARDH -2.142950 4.035774 -5.205846 2.548706e-04 0.2494635 -2.499577
## MYO7A -3.623746 3.343656 -5.042378 3.313702e-04 0.2494635 -2.555533
ttexpanpretnbct[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## IL21 -4.337764 2.331703 -7.171387 1.450555e-05 0.2494635 -2.001404
## AL359834.1 -4.066904 1.959833 -6.744352 2.587588e-05 0.2494635 -2.086606
## HAVCR2 -3.688488 4.407431 -6.590269 3.206902e-05 0.2494635 -2.120037
## PDCD1 -2.018335 6.705807 -6.061818 6.856912e-05 0.2494635 -2.246902
## RDH10 -3.704796 1.855264 -5.905851 8.643703e-05 0.2494635 -2.288294
## ZBED2 -4.221219 3.003524 -5.474515 1.669260e-04 0.2494635 -2.413265
## LINC01229 -4.147459 0.748824 -5.376912 1.944379e-04 0.2494635 -2.443831
## GOLIM4 -3.082110 3.021727 -5.281352 2.260572e-04 0.2494635 -2.474625
## SARDH -2.142950 4.035774 -5.205846 2.548706e-04 0.2494635 -2.499577
## MYO7A -3.623746 3.343656 -5.042378 3.313702e-04 0.2494635 -2.555533
ttexpanpretnbctsig=ttexpanpretnbct[ttexpanpretnbct$adj.P.Val<0.05,]
ttexpanpretnbctsig[1:10,]
## logFC AveExpr t P.Value adj.P.Val B
## NA NA NA NA NA NA NA
## NA.1 NA NA NA NA NA NA
## NA.2 NA NA NA NA NA NA
## NA.3 NA NA NA NA NA NA
## NA.4 NA NA NA NA NA NA
## NA.5 NA NA NA NA NA NA
## NA.6 NA NA NA NA NA NA
## NA.7 NA NA NA NA NA NA
## NA.8 NA NA NA NA NA NA
## NA.9 NA NA NA NA NA NA
rownames(ttexpanpretnbctsig)
## character(0)
match("PTN", rownames(ttexpanpretnbct))
## [1] 346
ttexpanpretnbct[346,]
## logFC AveExpr t P.Value adj.P.Val B
## PTN 3.246628 -0.04871996 2.905781 0.01374869 0.2494635 -3.563332
match("IQSEC2", rownames(ttexpanpretnbct))
## [1] 17370
ttexpanpretnbct[17370,]
## logFC AveExpr t P.Value adj.P.Val B
## IQSEC2 -0.6762304 -0.3744942 -0.7448736 0.4713396 0.6861507 -4.766041
match("CD163L1", rownames(ttexpanpretnbct))
## [1] 16020
ttexpanpretnbct[16020,]
## logFC AveExpr t P.Value adj.P.Val B
## CD163L1 0.8426568 -0.6760042 0.885095 0.3942869 0.6223831 -4.714321
match("LINC01133", rownames(ttexpanpretnbct))
## [1] 25099
ttexpanpretnbct[17370,]
## logFC AveExpr t P.Value adj.P.Val B
## IQSEC2 -0.6762304 -0.3744942 -0.7448736 0.4713396 0.6861507 -4.766041
match("HIST1H2AI", rownames(ttexpanpretnbct))
## [1] 11843
ttexpanpretnbccan[11843,]
## logFC AveExpr t P.Value adj.P.Val B
## IGLV2-18 -1.758434 -0.04561934 -1.04654 0.3125891 0.6674621 -4.594643
Negative logFC: Group 1 (E) has a higher expression than group 2 (NE)
##
expanlimpretnbctstats = expanlimpretnbct$t[,2]
expanlimpretnbctstats %>% sort(., decreasing=TRUE) %>% head()
## VIPR1 IGLV7-46 ZMIZ1-AS1 ZBTB10 SIK1B FOSB
## 4.710116 4.579561 4.393848 4.227258 4.204665 4.200772
expanlimpretnbctallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpretnbctstats), keytype="SYMBOL", columns="ENTREZID")
names(expanlimpretnbctstats) = expanlimpretnbctallEntrez$ENTREZID[ match(names(expanlimpretnbctstats), expanlimpretnbctallEntrez$SYMBOL)]
expanlimpretnbctstats %>% sort(., decreasing=TRUE) %>% head()
## 7433 28775 283050 65986 <NA> 2354
## 4.710116 4.579561 4.393848 4.227258 4.204665 4.200772
expanlimpretnbctstats_sort = sort(expanlimpretnbctstats, decreasing=TRUE)
expanlimpretnbctrPAgsea = gseGO(expanlimpretnbctstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (28.9% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpretnbctrPAgsea[1:5,] %>% knitr::kable()
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalues | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:0050907 | GO:0050907 | detection of chemical stimulus involved in sensory perception | 85 | 0.5410984 | 2.158437 | 0.0001026 | 0.0201266 | 0.0134323 | 8894 | tags=79%, list=35%, signal=51% | 5284/284521/4994/765/7442/1470/338557/81285/9033/8989/1258/727924/4025/26212/26659/123041/50839/54429/79290/401427/390429/50838/259289/81472/283694/135924/80835/83597/2779/390445/390174/259286/1472/343171/259296/390083/4993/390067/132112/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532 |
| GO:0007608 | GO:0007608 | sensory perception of smell | 71 | 0.5494861 | 2.145850 | 0.0001037 | 0.0201266 | 0.0134323 | 8894 | tags=80%, list=35%, signal=52% | 284521/4685/79846/4994/2535/158046/81285/1258/727924/29989/26212/26659/123041/79290/401427/390429/81472/6531/341359/127534/1813/283694/1262/135924/100507003/390445/390174/343171/390083/4993/390067/54831/390075/120065/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532 |
| GO:0009593 | GO:0009593 | detection of chemical stimulus | 120 | 0.4801434 | 1.975622 | 0.0001013 | 0.0201266 | 0.0134323 | 8894 | tags=68%, list=35%, signal=44% | 5284/284521/4994/10333/765/30819/7442/27345/284297/845/1470/338557/2645/4825/81285/9033/8989/1258/727924/4025/26212/26659/123041/50839/54429/79290/401427/390429/22953/50838/259289/81472/846/10242/3170/1755/283694/135924/80835/3651/83597/2779/390445/390174/259286/1472/343171/259296/390083/4993/390067/644168/132112/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532 |
| GO:0007606 | GO:0007606 | sensory perception of chemical stimulus | 127 | 0.4727709 | 1.954490 | 0.0001011 | 0.0201266 | 0.0134323 | 9061 | tags=69%, list=36%, signal=44% | 5284/284521/4685/79846/4994/765/7442/2535/1470/338557/158046/81285/9033/8989/1258/51764/727924/4025/29989/26212/26659/123041/50839/54429/79290/401427/390429/22953/50838/259289/338398/81472/4852/6531/341359/127534/1813/283694/40/1262/135924/80835/100507003/83597/2779/390445/390174/259286/1472/3933/343171/259296/390083/4993/390067/54831/132112/29850/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532/2774 |
| GO:0007218 | GO:0007218 | neuropeptide signaling pathway | 88 | 0.4315170 | 1.727733 | 0.0001024 | 0.0201266 | 0.0134323 | 8895 | tags=65%, list=35%, signal=42% | 11251/10316/6653/6751/22986/84109/7301/7349/4886/4878/4922/56413/4935/2861/11248/5368/57537/116/181/2925/2847/4923/5179/8811/64106/23620/4988/5697/114815/4852/4887/5539/10886/4889/6863/9607/3061/2587/124274/4985/10888/128674/347148/6755/796/4829/2837/113091/6866/64111/344758/3062/8620/339403/84539/56923/10887 |
venn( list( "ER+" = rownames(ttexpanpreer[1:200,]), "TNBC" = rownames(ttexpanpretnbc[1:200,])))
overlap <- intersect(rownames(ttexpanpreer[1:200,]), rownames(ttexpanpretnbc[1:200,]))
cat(paste0(overlap, collapse = "\n"))
## SLC30A3
## MB
## IL21
venn( list( "ER+" = rownames(ttexpanpreer[1:500,]), "TNBC" = rownames(ttexpanpretnbc[1:500,])))
overlapa5 <- intersect(rownames(ttexpanpreer[1:500,]), rownames(ttexpanpretnbc[1:500,]))
cat(paste0(overlap, collapse = "\n"))
## SLC30A3
## MB
## IL21
venn( list( "ER+" = rownames(ttexpanpreer[1:1000,]), "TNBC" = rownames(ttexpanpretnbc[1:1000,])))
overlapa1 <- intersect(rownames(ttexpanpreer[1:1000,]), rownames(ttexpanpretnbc[1:1000,]))
cat(paste0(overlap, collapse = "\n"))
## SLC30A3
## MB
## IL21
venn( list( "ER+" = rownames(ttexpanpreercan[1:200,]), "TNBC" = rownames(ttexpanpretnbccan[1:200,])))
overlap <- intersect(rownames(ttexpanpreercan[1:200,]), rownames(ttexpanpretnbccan[1:200,]))
cat(paste0(overlap, collapse = "\n"))
## IQSEC2
venn( list( "ER+" = rownames(ttexpanpreercan[1:500,]), "TNBC" = rownames(ttexpanpretnbccan[1:500,])))
overlapc5 <- intersect(rownames(ttexpanpreercan[1:500,]), rownames(ttexpanpretnbccan[1:500,]))
cat(paste0(overlap, collapse = "\n"))
## IQSEC2
venn( list( "ER+" = rownames(ttexpanpreercan[1:1000,]), "TNBC" = rownames(ttexpanpretnbccan[1:1000,])))
overlapc1 <- intersect(rownames(ttexpanpreercan[1:1000,]), rownames(ttexpanpretnbccan[1:1000,]))
cat(paste0(overlap, collapse = "\n"))
## IQSEC2
venn( list( "ER+" = rownames(ttexpanpreert[1:200,]), "TNBC" = rownames(ttexpanpretnbct[1:200,])))
overlap <- intersect(rownames(ttexpanpreert[1:200,]), rownames(ttexpanpretnbct[1:200,]))
cat(paste0(overlap, collapse = "\n"))
## IL21
## AKAP5
## CXCL13
## AL357054.4
venn( list( "ER+" = rownames(ttexpanpreert[1:500,]), "TNBC" = rownames(ttexpanpretnbct[1:500,])))
overlapt5 <- intersect(rownames(ttexpanpreert[1:500,]), rownames(ttexpanpretnbct[1:500,]))
cat(paste0(overlap, collapse = "\n"))
## IL21
## AKAP5
## CXCL13
## AL357054.4
venn( list( "ER+" = rownames(ttexpanpreert[1:1000,]), "TNBC" = rownames(ttexpanpretnbct[1:1000,])))
overlapt1 <- intersect(rownames(ttexpanpreert[1:1000,]), rownames(ttexpanpretnbct[1:1000,]))
cat(paste0(overlap, collapse = "\n"))
## IL21
## AKAP5
## CXCL13
## AL357054.4
par(mar = c(0,0,0,0))
venn( list( "A" = overlapa1, "C" = overlapc1, "T" = overlapt1))
mtext('All cells',2, -1, cex = 1.2, las=3)
overlapat <- intersect(overlapa1, overlapt1)
overlapac <- intersect(overlapa1, overlapc1)
cat(paste0(overlapat, collapse = "\n"))
## SLC30A3
## IL21
## PDXP
## AC017002.3
## KIR2DL4
## PTN
## CXCL13
cat(paste0(overlapac, collapse = "\n"))
## IQSEC2
## MB
## CRB3
## SLC4A1
barplot(deltag["IQSEC2",], las = 3, col = bctype_cc)
barplot(expDatpre["IQSEC2",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
barplot(expDatprecan["IQSEC2",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
abline(h=4.4, col='black', lty=5)
abline(h=3.63, col='black', lty=1)
abline(h=1.13, col='blue', lty=1)
abline(h=0.3, col='blue', lty=5)
abline(h=4.95, col='purple', lty=5)
abline(h=0.95, col='purple', lty=1)
data = as.matrix(expDatprecan["IQSEC2",])
df <- data[order(data,decreasing = F),]
df
## BIOKEY_12_Pre BIOKEY_5_Pre BIOKEY_18_Pre BIOKEY_3_Pre BIOKEY_6_Pre
## -1.3010409 -0.3545796 0.3088695 1.1346558 1.4179783
## BIOKEY_26_Pre BIOKEY_19_Pre BIOKEY_14_Pre BIOKEY_1_Pre BIOKEY_13_Pre
## 2.0767493 2.1036044 2.3450765 2.8347727 2.8567338
## BIOKEY_8_Pre BIOKEY_24_Pre BIOKEY_2_Pre BIOKEY_9_Pre BIOKEY_4_Pre
## 3.2178139 3.5800400 3.6175320 3.6374442 3.9794584
## BIOKEY_27_Pre BIOKEY_21_Pre BIOKEY_22_Pre BIOKEY_16_Pre BIOKEY_10_Pre
## 4.1522259 4.3407669 4.3768907 4.4016459 4.7572801
## BIOKEY_30_Pre BIOKEY_28_Pre BIOKEY_11_Pre BIOKEY_17_Pre BIOKEY_15_Pre
## 4.9135300 4.9708502 5.0338518 5.1864848 5.4364103
## BIOKEY_31_Pre
## 5.8043810
expDatpre["IQSEC2",]
## T_BIOKEY_1_Pre T_BIOKEY_10_Pre T_BIOKEY_11_Pre E_BIOKEY_12_Pre H_BIOKEY_13_Pre
## 0.8592318 3.9862237 4.0861234 1.2662461 2.0609353
## T_BIOKEY_14_Pre T_BIOKEY_15_Pre T_BIOKEY_16_Pre E_BIOKEY_17_Pre E_BIOKEY_18_Pre
## 1.2416261 4.3654499 1.9916973 4.5569787 2.3700212
## T_BIOKEY_19_Pre T_BIOKEY_2_Pre E_BIOKEY_21_Pre E_BIOKEY_22_Pre E_BIOKEY_24_Pre
## 1.8818208 3.3461626 4.1603064 3.8951549 3.6275542
## T_BIOKEY_26_Pre E_BIOKEY_27_Pre H_BIOKEY_28_Pre E_BIOKEY_3_Pre E_BIOKEY_30_Pre
## 1.9205495 2.8262734 3.5882158 3.5993576 4.3830468
## T_BIOKEY_31_Pre E_BIOKEY_4_Pre E_BIOKEY_5_Pre E_BIOKEY_6_Pre T_BIOKEY_8_Pre
## 5.2361344 2.4093407 -3.8991871 1.6153011 2.1517382
## T_BIOKEY_9_Pre
## 3.4817072
#barplot(df, las=3, col=bctype, ylab="Relative expression of CD163L1", cex.lab=1.2, cex.axis=1.2, cex=1.2)
#bctype <- bctype_cc[order(df)]
par(mar = c(14,5,2,2))
barplot(expDatprecan["MB",], las = 3, col = bctype_cc, margins=c(14,2,2,2))
barplot(expDatprecan["CD48",], las = 3, col = bctype_cc)
barplot(expDatprecan["SLC4A1",], las = 3, col = bctype_cc, ylab = "Relative expression of SLC4A1", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext(" Tumour sample", 1, 11, cex=1.2)
par(mar = c(13,5,2,0))
barplot(expDatpre["STX8",], las = 3, col = bctype_cc, ylab = "Relative expression of STX8", cex.lab=1.2, cex.axis=1.2, cex=1.2, ylim = c(-1,6))
mtext(" Tumour sample", 1, 11, cex=1.2)
barplot(expDatprecan["XBP1",], las = 3, col = bctype_cc, ylab = "Relative expression of XBP1", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext(" Tumour sample", 1, 11, cex=1.2)
barplot(expDattcellpre["STX8",], las = 3, col = bctype_cc, ylab = "Relative expression of STX8", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext(" Tumour sample", 1, 11, cex=1.2)
#FBXO34
barplot(expDatprecan["FMOD",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
barplot(expDatprecan["OCRL",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
barplot(expDatprecan["CTIF",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
barplot(expDattcellpre["IGHV3-48",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
preergenes <- ifelse((ttexpanpreer$logFC>2)&(ttexpanpreer$adj.P.Val<0.5),1,0)
names(preergenes) = rownames(ttexpanpreer)
pepwf = nullp(preergenes, "hg19", "geneSymbol")
head(pepwf[1:10,])
## DEgenes bias.data pwf
## CD163L1 1 4600 0.001150576
## AC025283.2 1 NA NA
## KRT17 1 1582 0.001403121
## CDKL2 0 4726 0.001150025
## RPP40 1 1166 0.001438165
## TACSTD2 1 2071 0.001327559
peGO.wall = goseq( pepwf, "hg19", "geneSymbol")
head(peGO.wall)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 17168 GO:0090191 0.0005040954 1.0000000 1
## 85 GO:0000171 0.0005397222 0.9999999 1
## 21162 GO:1905267 0.0007250070 0.9999999 1
## 19017 GO:1900028 0.0016909112 0.9999991 1
## 2660 GO:0005655 0.0017776624 0.9999990 1
## 21194 GO:1905331 0.0018507216 0.9999989 1
## numInCat
## 17168 2
## 85 2
## 21162 3
## 19017 7
## 2660 7
## 21194 8
## term
## 17168 negative regulation of branching involved in ureteric bud morphogenesis
## 85 ribonuclease MRP activity
## 21162 endonucleolytic cleavage involved in tRNA processing
## 19017 negative regulation of ruffle assembly
## 2660 nucleolar ribonuclease P complex
## 21194 negative regulation of morphogenesis of an epithelium
## ontology
## 17168 BP
## 85 MF
## 21162 BP
## 19017 BP
## 2660 CC
## 21194 BP
peggosig = peGO.wall[peGO.wall$numInCat<500,]
head(peggosig)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 17168 GO:0090191 0.0005040954 1.0000000 1
## 85 GO:0000171 0.0005397222 0.9999999 1
## 21162 GO:1905267 0.0007250070 0.9999999 1
## 19017 GO:1900028 0.0016909112 0.9999991 1
## 2660 GO:0005655 0.0017776624 0.9999990 1
## 21194 GO:1905331 0.0018507216 0.9999989 1
## numInCat
## 17168 2
## 85 2
## 21162 3
## 19017 7
## 2660 7
## 21194 8
## term
## 17168 negative regulation of branching involved in ureteric bud morphogenesis
## 85 ribonuclease MRP activity
## 21162 endonucleolytic cleavage involved in tRNA processing
## 19017 negative regulation of ruffle assembly
## 2660 nucleolar ribonuclease P complex
## 21194 negative regulation of morphogenesis of an epithelium
## ontology
## 17168 BP
## 85 MF
## 21162 BP
## 19017 BP
## 2660 CC
## 21194 BP
peGO.padj <- as.matrix(p.adjust(peggosig$over_represented_pvalue, method="BH"))
sum(peGO.padj < 0.05)
## [1] 0
head(peGO.padj)
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
## [6,] 1
rownames(peGO.padj) = peggosig$term
colnames(peGO.padj) = "GO term"
head(peGO.padj)
## GO term
## negative regulation of branching involved in ureteric bud morphogenesis 1
## ribonuclease MRP activity 1
## endonucleolytic cleavage involved in tRNA processing 1
## negative regulation of ruffle assembly 1
## nucleolar ribonuclease P complex 1
## negative regulation of morphogenesis of an epithelium 1
pretnbcgenes <- ifelse((ttexpanpretnbc$logFC>2) & (ttexpanpretnbc$adj.P.Val<0.5),1,0)
names(pretnbcgenes) = rownames(ttexpanpretnbc)
ptpwf = nullp(pretnbcgenes, "hg19", "geneSymbol")
head(ptpwf[1:10,])
## DEgenes bias.data pwf
## HIST1H2AI 0 469.0 0.03161733
## EBF3 1 4377.0 0.04434441
## LINC01484 0 NA NA
## SLC30A3 0 2100.0 0.03822885
## AL606807.1 0 NA NA
## EDA 1 1188.5 0.03349868
ptGO.wall = goseq( ptpwf, "hg19", "geneSymbol")
head(ptGO.wall)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 8440 GO:0032501 7.710725e-24 1 379
## 7705 GO:0031226 4.480386e-21 1 143
## 2803 GO:0005887 7.050524e-21 1 138
## 13097 GO:0048731 9.960882e-20 1 263
## 16531 GO:0071944 5.946207e-18 1 325
## 3677 GO:0007275 2.622933e-17 1 275
## numInCat term ontology
## 8440 6366 multicellular organismal process BP
## 7705 1590 intrinsic component of plasma membrane CC
## 2803 1510 integral component of plasma membrane CC
## 13097 3988 system development BP
## 16531 5498 cell periphery CC
## 3677 4416 multicellular organism development BP
ptggosig = ptGO.wall[ptGO.wall$numInCat<500,]
head(ptggosig)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 17923 GO:0098742 9.637264e-14 1 42
## 15357 GO:0062023 2.276289e-11 1 47
## 7405 GO:0030545 3.684846e-11 1 48
## 3811 GO:0008015 8.505493e-11 1 51
## 7406 GO:0030546 1.799592e-10 1 44
## 12800 GO:0048018 3.279963e-10 1 43
## numInCat term
## 17923 269 cell-cell adhesion via plasma-membrane adhesion molecules
## 15357 401 collagen-containing extracellular matrix
## 7405 456 signaling receptor regulator activity
## 3811 475 blood circulation
## 7406 416 signaling receptor activator activity
## 12800 409 receptor ligand activity
## ontology
## 17923 BP
## 15357 CC
## 7405 MF
## 3811 BP
## 7406 MF
## 12800 MF
ptGO.padj <- as.matrix(p.adjust(ptggosig$over_represented_pvalue, method="BH"))
sum(ptGO.padj < 0.05)
## [1] 145
head(ptGO.padj)
## [,1]
## [1,] 2.109886e-09
## [2,] 2.491740e-07
## [3,] 2.689078e-07
## [4,] 4.655269e-07
## [5,] 7.879693e-07
## [6,] 1.196804e-06
rownames(ptGO.padj) = ptggosig$term
colnames(ptGO.padj) = "GO term"
head(ptGO.padj)
## GO term
## cell-cell adhesion via plasma-membrane adhesion molecules 2.109886e-09
## collagen-containing extracellular matrix 2.491740e-07
## signaling receptor regulator activity 2.689078e-07
## blood circulation 4.655269e-07
## signaling receptor activator activity 7.879693e-07
## receptor ligand activity 1.196804e-06
#ptgosigpadj = ptGO.padj[ptGO.wall$numInCat<500,]
#head(ptgosigpadj)
preercangenes <- ifelse((ttexpanpreercan$logFC>2) & (ttexpanpreercan$adj.P.Val<0.5),1,0)
names(preercangenes) = rownames(ttexpanpreercan)
pecpwf = nullp(preercangenes, "hg19", "geneSymbol")
head(pecpwf[1:10,])
## DEgenes bias.data pwf
## STX8 1 975.0 0.06810799
## TIMM22 1 1675.0 0.08843614
## ARHGEF7 1 5393.0 0.09634920
## MKRN2 1 2771.0 0.09563165
## PARP3 1 2366.5 0.09522666
## BORCS5 1 NA NA
pecGO.wall = goseq( pecpwf, "hg19", "geneSymbol")
head(pecGO.wall)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2699 GO:0005737 4.464147e-13 1 1093
## 2648 GO:0005622 5.360956e-12 1 1292
## 11101 GO:0043227 6.344265e-10 1 1165
## 20289 GO:1903561 6.887543e-09 1 241
## 15462 GO:0070062 7.169034e-09 1 239
## 11104 GO:0043230 7.205187e-09 1 241
## numInCat term ontology
## 2699 11024 cytoplasm CC
## 2648 13540 intracellular anatomical structure CC
## 11101 12102 membrane-bounded organelle CC
## 20289 1988 extracellular vesicle CC
## 15462 1970 extracellular exosome CC
## 11104 1989 extracellular organelle CC
pecggosig = pecGO.wall[pecGO.wall$numInCat<500,]
head(pecggosig)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 15357 GO:0062023 5.860226e-08 1.0000000 68
## 8381 GO:0032432 1.078969e-06 0.9999998 21
## 2468 GO:0005201 1.161613e-06 0.9999996 35
## 10041 GO:0035966 1.779983e-06 0.9999994 33
## 9527 GO:0034976 3.257405e-06 0.9999987 45
## 16870 GO:0072659 1.217723e-05 0.9999946 47
## numInCat term ontology
## 15357 401 collagen-containing extracellular matrix CC
## 8381 73 actin filament bundle CC
## 2468 162 extracellular matrix structural constituent MF
## 10041 155 response to topologically incorrect protein BP
## 9527 248 response to endoplasmic reticulum stress BP
## 16870 276 protein localization to plasma membrane BP
pecGO.padj <- as.matrix(p.adjust(pecggosig$over_represented_pvalue, method="BH"))
sum(pecGO.padj < 0.05)
## [1] 8
head(pecGO.padj)
## [,1]
## [1,] 0.001282979
## [2,] 0.008477067
## [3,] 0.008477067
## [4,] 0.009742293
## [5,] 0.014262875
## [6,] 0.044432680
rownames(pecGO.padj) = pecggosig$term
colnames(pecGO.padj) = "GO term"
head(pecGO.padj)
## GO term
## collagen-containing extracellular matrix 0.001282979
## actin filament bundle 0.008477067
## extracellular matrix structural constituent 0.008477067
## response to topologically incorrect protein 0.009742293
## response to endoplasmic reticulum stress 0.014262875
## protein localization to plasma membrane 0.044432680
pretnbccangenes <- ifelse((ttexpanpretnbccan$logFC>2) & (ttexpanpretnbccan$adj.P.Val<0.5),1,0)
names(pretnbccangenes) = rownames(ttexpanpretnbccan)
ptcpwf = nullp(pretnbccangenes, "hg19", "geneSymbol")
head(ptcpwf[1:10,])
## DEgenes bias.data pwf
## LINC01133 0 NA NA
## RNF128 0 2763.5 0.001003914
## SPIRE2 0 3259.0 0.001002940
## CXCL13 0 1206.0 0.001004409
## NXPH3 0 5552.0 0.001000005
## AP001453.2 0 NA NA
ptcGO.wall = goseq( ptcpwf, "hg19", "geneSymbol")
head(ptcGO.wall)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002 1 1 0
## 2 GO:0000003 1 1 0
## 3 GO:0000009 1 1 0
## 4 GO:0000010 1 1 0
## 5 GO:0000012 1 1 0
## 6 GO:0000014 1 1 0
## numInCat term ontology
## 1 31 mitochondrial genome maintenance BP
## 2 1329 reproduction BP
## 3 2 alpha-1,6-mannosyltransferase activity MF
## 4 2 trans-hexaprenyltranstransferase activity MF
## 5 11 single strand break repair BP
## 6 10 single-stranded DNA endodeoxyribonuclease activity MF
ptcggosig = ptcGO.wall[ptcGO.wall$numInCat<500,]
head(ptcggosig)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002 1 1 0
## 3 GO:0000009 1 1 0
## 4 GO:0000010 1 1 0
## 5 GO:0000012 1 1 0
## 6 GO:0000014 1 1 0
## 7 GO:0000015 1 1 0
## numInCat term ontology
## 1 31 mitochondrial genome maintenance BP
## 3 2 alpha-1,6-mannosyltransferase activity MF
## 4 2 trans-hexaprenyltranstransferase activity MF
## 5 11 single strand break repair BP
## 6 10 single-stranded DNA endodeoxyribonuclease activity MF
## 7 4 phosphopyruvate hydratase complex CC
ptcGO.padj <- as.matrix(p.adjust(ptcggosig$over_represented_pvalue, method="BH"))
sum(ptcGO.padj < 0.05)
## [1] 0
head(ptcGO.padj)
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
## [6,] 1
rownames(ptcGO.padj) = ptcggosig$term
colnames(ptcGO.padj) = "GO term"
head(ptcGO.padj)
## GO term
## mitochondrial genome maintenance 1
## alpha-1,6-mannosyltransferase activity 1
## trans-hexaprenyltranstransferase activity 1
## single strand break repair 1
## single-stranded DNA endodeoxyribonuclease activity 1
## phosphopyruvate hydratase complex 1
preertgenes <- ifelse((ttexpanpreert$logFC>2) & (ttexpanpreert$adj.P.Val<0.5),1,0)
names(preertgenes) = rownames(ttexpanpreert)
petpwf = nullp(preertgenes, "hg19", "geneSymbol")
head(petpwf[1:10,])
## DEgenes bias.data pwf
## CCL3L1 0 786.0 0.001
## IGHV3-48 0 NA NA
## ADAMTS10 0 4263.0 0.001
## SMPD3 0 5276.0 0.001
## ZYG11B 0 8154.0 0.001
## TYW1B 0 2393.5 0.001
petGO.wall = goseq( petpwf, "hg19", "geneSymbol")
head(petGO.wall)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002 1 1 0
## 2 GO:0000003 1 1 0
## 3 GO:0000009 1 1 0
## 4 GO:0000010 1 1 0
## 5 GO:0000012 1 1 0
## 6 GO:0000014 1 1 0
## numInCat term ontology
## 1 31 mitochondrial genome maintenance BP
## 2 1329 reproduction BP
## 3 2 alpha-1,6-mannosyltransferase activity MF
## 4 2 trans-hexaprenyltranstransferase activity MF
## 5 11 single strand break repair BP
## 6 10 single-stranded DNA endodeoxyribonuclease activity MF
petggosig = petGO.wall[petGO.wall$numInCat<500,]
head(petggosig)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002 1 1 0
## 3 GO:0000009 1 1 0
## 4 GO:0000010 1 1 0
## 5 GO:0000012 1 1 0
## 6 GO:0000014 1 1 0
## 7 GO:0000015 1 1 0
## numInCat term ontology
## 1 31 mitochondrial genome maintenance BP
## 3 2 alpha-1,6-mannosyltransferase activity MF
## 4 2 trans-hexaprenyltranstransferase activity MF
## 5 11 single strand break repair BP
## 6 10 single-stranded DNA endodeoxyribonuclease activity MF
## 7 4 phosphopyruvate hydratase complex CC
petGO.padj <- as.matrix(p.adjust(petggosig$over_represented_pvalue, method="BH"))
sum(petGO.padj < 0.05)
## [1] 0
head(petGO.padj)
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
## [6,] 1
rownames(petGO.padj) = petggosig$term
colnames(petGO.padj) = "GO term"
head(petGO.padj)
## GO term
## mitochondrial genome maintenance 1
## alpha-1,6-mannosyltransferase activity 1
## trans-hexaprenyltranstransferase activity 1
## single strand break repair 1
## single-stranded DNA endodeoxyribonuclease activity 1
## phosphopyruvate hydratase complex 1
pretnbctgenes <- ifelse((ttexpanpretnbct$logFC>2) & (ttexpanpretnbct$adj.P.Val<0.5),1,0)
names(pretnbctgenes) = rownames(ttexpanpretnbct)
pttpwf = nullp(pretnbctgenes, "hg19", "geneSymbol")
head(pttpwf[1:10,])
## DEgenes bias.data pwf
## IL21 0 621 0.03829179
## AL359834.1 0 NA NA
## HAVCR2 0 2327 0.03980521
## PDCD1 0 2116 0.03961800
## RDH10 0 3535 0.04087719
## ZBED2 0 2188 0.03968188
pttGO.wall = goseq(pttpwf, "hg19", "geneSymbol")
head(pttGO.wall)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 16531 GO:0071944 4.508184e-18 1 340
## 2802 GO:0005886 2.880957e-12 1 297
## 7303 GO:0030312 2.304307e-10 1 56
## 2590 GO:0005509 2.443299e-10 1 65
## 2803 GO:0005887 5.121150e-10 1 113
## 7597 GO:0031012 5.898720e-10 1 55
## numInCat term ontology
## 16531 5498 cell periphery CC
## 2802 5049 plasma membrane CC
## 7303 533 external encapsulating structure CC
## 2590 667 calcium ion binding MF
## 2803 1510 integral component of plasma membrane CC
## 7597 532 extracellular matrix CC
pttggosig = pttGO.wall[pttGO.wall$numInCat<500,]
head(pttggosig)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 15357 GO:0062023 5.387174e-09 1.0000000 44
## 6644 GO:0019814 4.389752e-06 0.9999989 20
## 3594 GO:0007156 9.658510e-06 0.9999972 21
## 2307 GO:0004930 1.064044e-05 0.9999957 36
## 7405 GO:0030545 1.282822e-05 0.9999946 39
## 439 GO:0001525 1.652530e-05 0.9999927 42
## numInCat term
## 15357 401 collagen-containing extracellular matrix
## 6644 151 immunoglobulin complex
## 3594 165 homophilic cell adhesion via plasma membrane adhesion molecules
## 2307 400 G protein-coupled receptor activity
## 7405 456 signaling receptor regulator activity
## 439 496 angiogenesis
## ontology
## 15357 CC
## 6644 CC
## 3594 BP
## 2307 MF
## 7405 MF
## 439 BP
pttGO.padj <- as.matrix(p.adjust(pttggosig$over_represented_pvalue, method="BH"))
sum(pttGO.padj < 0.05)
## [1] 2
head(pttGO.padj)
## [,1]
## [1,] 0.0001179414
## [2,] 0.0480524198
## [3,] 0.0561696487
## [4,] 0.0561696487
## [5,] 0.0561696487
## [6,] 0.0602980769
rownames(pttGO.padj) = pttggosig$term
colnames(pttGO.padj) = "GO term"
head(pttGO.padj)
## GO term
## collagen-containing extracellular matrix 0.0001179414
## immunoglobulin complex 0.0480524198
## homophilic cell adhesion via plasma membrane adhesion molecules 0.0561696487
## G protein-coupled receptor activity 0.0561696487
## signaling receptor regulator activity 0.0561696487
## angiogenesis 0.0602980769
barplot(expDattcellpre["IGLV2-8",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
## Warning in plot.window(xlim, ylim, log = log, ...): "margins" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "margins" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "margins" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "margins" is not
## a graphical parameter